mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	RCU pull request for v6.9
This pull request contains the following branches:
 
 rcu-doc.2024.02.14a: Documentation updates.
 
 rcu-nocb.2024.02.14a: RCU NOCB updates, code cleanups, unnecessary
         barrier removals and minor bug fixes.
 
 rcu-exp.2024.02.14a: RCU exp, fixing a circular dependency between
         workqueue and RCU expedited callback handling.
 
 rcu-tasks.2024.02.26a: RCU tasks, avoiding deadlocks in do_exit() when
         calling synchronize_rcu_task() with a mutex hold, maintaining
 	real-time response in rcu_tasks_postscan() and a minor
         fix for tasks trace quiescence check.
 
 rcu-misc.2024.02.14a: Misc updates, comments and readibility
 	improvement, boot time parameter for lazy RCU and rcutorture
 	improvement.
 -----BEGIN PGP SIGNATURE-----
 
 iQFJBAABCAAzFiEEj5IosQTPz8XU1wRHSXnow7UH+rgFAmXev80VHGJvcXVuLmZl
 bmdAZ21haWwuY29tAAoJEEl56MO1B/q4UYgH/3CQF495sAS58M3tsy/HCMbq8DUb
 9AoIKCdzqvN2xzjYxHHs59jA+MdEIOGbSIx1yWk0KZSqRSfxwd9nGbxO5EHbz6L3
 gdZdOHbpZHPmtcUbdOfXDyhy4JaF+EBuRp9FOnsJ+w4/a0lFWMinaic4BweMEESS
 y+gD5fcMzzCthedXn/HeQpeYUKOQ8Jpth5K5s4CkeaehEbdRVLFxjwFgQYd8Oeqn
 0SfjNMRdBubDxydi4Rx1Ado7mKnfBHoot+9l0PHi6T2Rq89H0AUn/Dj3YOEkW7QT
 aKRSVpPJnG3EFHUUzwprODAoQGOC6EpTVpxSqnpO2ewHnnMPhz/IXzRT86w=
 =gypc
 -----END PGP SIGNATURE-----
Merge tag 'rcu.next.v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/boqun/linux
Pull RCU updates from Boqun Feng:
 - Eliminate deadlocks involving do_exit() and RCU tasks, by Paul:
   Instead of SRCU read side critical sections, now a percpu list is
   used in do_exit() for scaning yet-to-exit tasks
 - Fix a deadlock due to the dependency between workqueue and RCU
   expedited grace period, reported by Anna-Maria Behnsen and Thomas
   Gleixner and fixed by Frederic: Now RCU expedited always uses its own
   kthread worker instead of a workqueue
 - RCU NOCB updates, code cleanups, unnecessary barrier removals and
   minor bug fixes
 - Maintain real-time response in rcu_tasks_postscan() and a minor fix
   for tasks trace quiescence check
 - Misc updates, comments and readibility improvement, boot time
   parameter for lazy RCU and rcutorture improvement
 - Documentation updates
* tag 'rcu.next.v6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/boqun/linux: (34 commits)
  rcu-tasks: Maintain real-time response in rcu_tasks_postscan()
  rcu-tasks: Eliminate deadlocks involving do_exit() and RCU tasks
  rcu-tasks: Maintain lists to eliminate RCU-tasks/do_exit() deadlocks
  rcu-tasks: Initialize data to eliminate RCU-tasks/do_exit() deadlocks
  rcu-tasks: Initialize callback lists at rcu_init() time
  rcu-tasks: Add data to eliminate RCU-tasks/do_exit() deadlocks
  rcu-tasks: Repair RCU Tasks Trace quiescence check
  rcu/sync: remove un-used rcu_sync_enter_start function
  rcutorture: Suppress rtort_pipe_count warnings until after stalls
  srcu: Improve comments about acceleration leak
  rcu: Provide a boot time parameter to control lazy RCU
  rcu: Rename jiffies_till_flush to jiffies_lazy_flush
  doc: Update checklist.rst discussion of callback execution
  doc: Clarify use of slab constructors and SLAB_TYPESAFE_BY_RCU
  context_tracking: Fix kerneldoc headers for __ct_user_{enter,exit}()
  doc: Add EARLY flag to early-parsed kernel boot parameters
  doc: Add CONFIG_RCU_STRICT_GRACE_PERIOD to checklist.rst
  doc: Make checklist.rst note that spinlocks are implied RCU readers
  doc: Make whatisRCU.rst note that spinlocks are RCU readers
  doc: Spinlocks are implied RCU readers
  ...
			
			
This commit is contained in:
		
						commit
						e5a3878c94
					
				
					 24 changed files with 687 additions and 560 deletions
				
			
		|  | @ -68,7 +68,8 @@ over a rather long period of time, but improvements are always welcome! | |||
| 	rcu_read_lock_sched(), or by the appropriate update-side lock. | ||||
| 	Explicit disabling of preemption (preempt_disable(), for example) | ||||
| 	can serve as rcu_read_lock_sched(), but is less readable and | ||||
| 	prevents lockdep from detecting locking issues. | ||||
| 	prevents lockdep from detecting locking issues.  Acquiring a | ||||
| 	spinlock also enters an RCU read-side critical section. | ||||
| 
 | ||||
| 	Please note that you *cannot* rely on code known to be built | ||||
| 	only in non-preemptible kernels.  Such code can and will break, | ||||
|  | @ -382,16 +383,17 @@ over a rather long period of time, but improvements are always welcome! | |||
| 	must use whatever locking or other synchronization is required | ||||
| 	to safely access and/or modify that data structure. | ||||
| 
 | ||||
| 	Do not assume that RCU callbacks will be executed on the same | ||||
| 	CPU that executed the corresponding call_rcu() or call_srcu(). | ||||
| 	For example, if a given CPU goes offline while having an RCU | ||||
| 	callback pending, then that RCU callback will execute on some | ||||
| 	surviving CPU.	(If this was not the case, a self-spawning RCU | ||||
| 	callback would prevent the victim CPU from ever going offline.) | ||||
| 	Furthermore, CPUs designated by rcu_nocbs= might well *always* | ||||
| 	have their RCU callbacks executed on some other CPUs, in fact, | ||||
| 	for some  real-time workloads, this is the whole point of using | ||||
| 	the rcu_nocbs= kernel boot parameter. | ||||
| 	Do not assume that RCU callbacks will be executed on | ||||
| 	the same CPU that executed the corresponding call_rcu(), | ||||
| 	call_srcu(), call_rcu_tasks(), call_rcu_tasks_rude(), or | ||||
| 	call_rcu_tasks_trace().  For example, if a given CPU goes offline | ||||
| 	while having an RCU callback pending, then that RCU callback | ||||
| 	will execute on some surviving CPU.  (If this was not the case, | ||||
| 	a self-spawning RCU callback would prevent the victim CPU from | ||||
| 	ever going offline.)  Furthermore, CPUs designated by rcu_nocbs= | ||||
| 	might well *always* have their RCU callbacks executed on some | ||||
| 	other CPUs, in fact, for some  real-time workloads, this is the | ||||
| 	whole point of using the rcu_nocbs= kernel boot parameter. | ||||
| 
 | ||||
| 	In addition, do not assume that callbacks queued in a given order | ||||
| 	will be invoked in that order, even if they all are queued on the | ||||
|  | @ -444,7 +446,7 @@ over a rather long period of time, but improvements are always welcome! | |||
| 	real-time workloads than is synchronize_rcu_expedited(). | ||||
| 
 | ||||
| 	It is also permissible to sleep in RCU Tasks Trace read-side | ||||
| 	critical, which are delimited by rcu_read_lock_trace() and | ||||
| 	critical section, which are delimited by rcu_read_lock_trace() and | ||||
| 	rcu_read_unlock_trace().  However, this is a specialized flavor | ||||
| 	of RCU, and you should not use it without first checking with | ||||
| 	its current users.  In most cases, you should instead use SRCU. | ||||
|  | @ -490,6 +492,12 @@ over a rather long period of time, but improvements are always welcome! | |||
| 		since the last time that you passed that same object to | ||||
| 		call_rcu() (or friends). | ||||
| 
 | ||||
| 	CONFIG_RCU_STRICT_GRACE_PERIOD: | ||||
| 		combine with KASAN to check for pointers leaked out | ||||
| 		of RCU read-side critical sections.  This Kconfig | ||||
| 		option is tough on both performance and scalability, | ||||
| 		and so is limited to four-CPU systems. | ||||
| 
 | ||||
| 	__rcu sparse checks: | ||||
| 		tag the pointer to the RCU-protected data structure | ||||
| 		with __rcu, and sparse will warn you if you access that | ||||
|  |  | |||
|  | @ -408,7 +408,10 @@ member of the rcu_dereference() to use in various situations: | |||
| 	RCU flavors, an RCU read-side critical section is entered | ||||
| 	using rcu_read_lock(), anything that disables bottom halves, | ||||
| 	anything that disables interrupts, or anything that disables | ||||
| 	preemption. | ||||
| 	preemption.  Please note that spinlock critical sections | ||||
| 	are also implied RCU read-side critical sections, even when | ||||
| 	they are preemptible, as they are in kernels built with | ||||
| 	CONFIG_PREEMPT_RT=y. | ||||
| 
 | ||||
| 2.	If the access might be within an RCU read-side critical section | ||||
| 	on the one hand, or protected by (say) my_lock on the other, | ||||
|  |  | |||
|  | @ -172,14 +172,25 @@ rcu_read_lock() | |||
| 	critical section.  Reference counts may be used in conjunction | ||||
| 	with RCU to maintain longer-term references to data structures. | ||||
| 
 | ||||
| 	Note that anything that disables bottom halves, preemption, | ||||
| 	or interrupts also enters an RCU read-side critical section. | ||||
| 	Acquiring a spinlock also enters an RCU read-side critical | ||||
| 	sections, even for spinlocks that do not disable preemption, | ||||
| 	as is the case in kernels built with CONFIG_PREEMPT_RT=y. | ||||
| 	Sleeplocks do *not* enter RCU read-side critical sections. | ||||
| 
 | ||||
| rcu_read_unlock() | ||||
| ^^^^^^^^^^^^^^^^^ | ||||
| 	void rcu_read_unlock(void); | ||||
| 
 | ||||
| 	This temporal primitives is used by a reader to inform the | ||||
| 	reclaimer that the reader is exiting an RCU read-side critical | ||||
| 	section.  Note that RCU read-side critical sections may be nested | ||||
| 	and/or overlapping. | ||||
| 	section.  Anything that enables bottom halves, preemption, | ||||
| 	or interrupts also exits an RCU read-side critical section. | ||||
| 	Releasing a spinlock also exits an RCU read-side critical section. | ||||
| 
 | ||||
| 	Note that RCU read-side critical sections may be nested and/or | ||||
| 	overlapping. | ||||
| 
 | ||||
| synchronize_rcu() | ||||
| ^^^^^^^^^^^^^^^^^ | ||||
|  | @ -952,8 +963,8 @@ unfortunately any spinlock in a ``SLAB_TYPESAFE_BY_RCU`` object must be | |||
| initialized after each and every call to kmem_cache_alloc(), which renders | ||||
| reference-free spinlock acquisition completely unsafe.  Therefore, when | ||||
| using ``SLAB_TYPESAFE_BY_RCU``, make proper use of a reference counter. | ||||
| (Those willing to use a kmem_cache constructor may also use locking, | ||||
| including cache-friendly sequence locking.) | ||||
| (Those willing to initialize their locks in a kmem_cache constructor | ||||
| may also use locking, including cache-friendly sequence locking.) | ||||
| 
 | ||||
| With traditional reference counting -- such as that implemented by the | ||||
| kref library in Linux -- there is typically code that runs when the last | ||||
|  |  | |||
|  | @ -108,6 +108,7 @@ is applicable:: | |||
| 	CMA	Contiguous Memory Area support is enabled. | ||||
| 	DRM	Direct Rendering Management support is enabled. | ||||
| 	DYNAMIC_DEBUG Build in debug messages and enable them at runtime | ||||
| 	EARLY	Parameter processed too early to be embedded in initrd. | ||||
| 	EDD	BIOS Enhanced Disk Drive Services (EDD) is enabled | ||||
| 	EFI	EFI Partitioning (GPT) is enabled | ||||
| 	EVM	Extended Verification Module | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -37,7 +37,6 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) | |||
| } | ||||
| 
 | ||||
| extern void rcu_sync_init(struct rcu_sync *); | ||||
| extern void rcu_sync_enter_start(struct rcu_sync *); | ||||
| extern void rcu_sync_enter(struct rcu_sync *); | ||||
| extern void rcu_sync_exit(struct rcu_sync *); | ||||
| extern void rcu_sync_dtor(struct rcu_sync *); | ||||
|  |  | |||
|  | @ -184,9 +184,9 @@ void rcu_tasks_trace_qs_blkd(struct task_struct *t); | |||
| 	do {									\ | ||||
| 		int ___rttq_nesting = READ_ONCE((t)->trc_reader_nesting);	\ | ||||
| 										\ | ||||
| 		if (likely(!READ_ONCE((t)->trc_reader_special.b.need_qs)) &&	\ | ||||
| 		if (unlikely(READ_ONCE((t)->trc_reader_special.b.need_qs) == TRC_NEED_QS) &&	\ | ||||
| 		    likely(!___rttq_nesting)) {					\ | ||||
| 			rcu_trc_cmpxchg_need_qs((t), 0,	TRC_NEED_QS_CHECKED);	\ | ||||
| 			rcu_trc_cmpxchg_need_qs((t), TRC_NEED_QS, TRC_NEED_QS_CHECKED);	\ | ||||
| 		} else if (___rttq_nesting && ___rttq_nesting != INT_MIN &&	\ | ||||
| 			   !READ_ONCE((t)->trc_reader_special.b.blocked)) {	\ | ||||
| 			rcu_tasks_trace_qs_blkd(t);				\ | ||||
|  |  | |||
|  | @ -858,6 +858,8 @@ struct task_struct { | |||
| 	u8				rcu_tasks_idx; | ||||
| 	int				rcu_tasks_idle_cpu; | ||||
| 	struct list_head		rcu_tasks_holdout_list; | ||||
| 	int				rcu_tasks_exit_cpu; | ||||
| 	struct list_head		rcu_tasks_exit_list; | ||||
| #endif /* #ifdef CONFIG_TASKS_RCU */ | ||||
| 
 | ||||
| #ifdef CONFIG_TASKS_TRACE_RCU | ||||
|  |  | |||
|  | @ -147,6 +147,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = { | |||
| 	.rcu_tasks_holdout = false, | ||||
| 	.rcu_tasks_holdout_list = LIST_HEAD_INIT(init_task.rcu_tasks_holdout_list), | ||||
| 	.rcu_tasks_idle_cpu = -1, | ||||
| 	.rcu_tasks_exit_list = LIST_HEAD_INIT(init_task.rcu_tasks_exit_list), | ||||
| #endif | ||||
| #ifdef CONFIG_TASKS_TRACE_RCU | ||||
| 	.trc_reader_nesting = 0, | ||||
|  |  | |||
|  | @ -458,6 +458,8 @@ static __always_inline void context_tracking_recursion_exit(void) | |||
|  * __ct_user_enter - Inform the context tracking that the CPU is going | ||||
|  *		     to enter user or guest space mode. | ||||
|  * | ||||
|  * @state: userspace context-tracking state to enter. | ||||
|  * | ||||
|  * This function must be called right before we switch from the kernel | ||||
|  * to user or guest space, when it's guaranteed the remaining kernel | ||||
|  * instructions to execute won't use any RCU read side critical section | ||||
|  | @ -595,6 +597,8 @@ NOKPROBE_SYMBOL(user_enter_callable); | |||
|  * __ct_user_exit - Inform the context tracking that the CPU is | ||||
|  *		    exiting user or guest mode and entering the kernel. | ||||
|  * | ||||
|  * @state: userspace context-tracking state being exited from. | ||||
|  * | ||||
|  * This function must be called after we entered the kernel from user or | ||||
|  * guest space before any use of RCU read side critical section. This | ||||
|  * potentially include any high level kernel code like syscalls, exceptions, | ||||
|  |  | |||
|  | @ -1978,6 +1978,7 @@ static inline void rcu_copy_process(struct task_struct *p) | |||
| 	p->rcu_tasks_holdout = false; | ||||
| 	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list); | ||||
| 	p->rcu_tasks_idle_cpu = -1; | ||||
| 	INIT_LIST_HEAD(&p->rcu_tasks_exit_list); | ||||
| #endif /* #ifdef CONFIG_TASKS_RCU */ | ||||
| #ifdef CONFIG_TASKS_TRACE_RCU | ||||
| 	p->trc_reader_nesting = 0; | ||||
|  |  | |||
|  | @ -314,6 +314,19 @@ config RCU_LAZY | |||
| 	  To save power, batch RCU callbacks and flush after delay, memory | ||||
| 	  pressure, or callback list growing too big. | ||||
| 
 | ||||
| 	  Requires rcu_nocbs=all to be set. | ||||
| 
 | ||||
| 	  Use rcutree.enable_rcu_lazy=0 to turn it off at boot time. | ||||
| 
 | ||||
| config RCU_LAZY_DEFAULT_OFF | ||||
| 	bool "Turn RCU lazy invocation off by default" | ||||
| 	depends on RCU_LAZY | ||||
| 	default n | ||||
| 	help | ||||
| 	  Allows building the kernel with CONFIG_RCU_LAZY=y yet keep it default | ||||
| 	  off. Boot time param rcutree.enable_rcu_lazy=1 can be used to switch | ||||
| 	  it back on. | ||||
| 
 | ||||
| config RCU_DOUBLE_CHECK_CB_TIME | ||||
| 	bool "RCU callback-batch backup time check" | ||||
| 	depends on RCU_EXPERT | ||||
|  |  | |||
|  | @ -528,6 +528,12 @@ struct task_struct *get_rcu_tasks_gp_kthread(void); | |||
| struct task_struct *get_rcu_tasks_rude_gp_kthread(void); | ||||
| #endif // # ifdef CONFIG_TASKS_RUDE_RCU
 | ||||
| 
 | ||||
| #ifdef CONFIG_TASKS_RCU_GENERIC | ||||
| void tasks_cblist_init_generic(void); | ||||
| #else /* #ifdef CONFIG_TASKS_RCU_GENERIC */ | ||||
| static inline void tasks_cblist_init_generic(void) { } | ||||
| #endif /* #else #ifdef CONFIG_TASKS_RCU_GENERIC */ | ||||
| 
 | ||||
| #define RCU_SCHEDULER_INACTIVE	0 | ||||
| #define RCU_SCHEDULER_INIT	1 | ||||
| #define RCU_SCHEDULER_RUNNING	2 | ||||
|  | @ -543,11 +549,11 @@ enum rcutorture_type { | |||
| }; | ||||
| 
 | ||||
| #if defined(CONFIG_RCU_LAZY) | ||||
| unsigned long rcu_lazy_get_jiffies_till_flush(void); | ||||
| void rcu_lazy_set_jiffies_till_flush(unsigned long j); | ||||
| unsigned long rcu_get_jiffies_lazy_flush(void); | ||||
| void rcu_set_jiffies_lazy_flush(unsigned long j); | ||||
| #else | ||||
| static inline unsigned long rcu_lazy_get_jiffies_till_flush(void) { return 0; } | ||||
| static inline void rcu_lazy_set_jiffies_till_flush(unsigned long j) { } | ||||
| static inline unsigned long rcu_get_jiffies_lazy_flush(void) { return 0; } | ||||
| static inline void rcu_set_jiffies_lazy_flush(unsigned long j) { } | ||||
| #endif | ||||
| 
 | ||||
| #if defined(CONFIG_TREE_RCU) | ||||
|  | @ -623,12 +629,7 @@ int rcu_get_gp_kthreads_prio(void); | |||
| void rcu_fwd_progress_check(unsigned long j); | ||||
| void rcu_force_quiescent_state(void); | ||||
| extern struct workqueue_struct *rcu_gp_wq; | ||||
| #ifdef CONFIG_RCU_EXP_KTHREAD | ||||
| extern struct kthread_worker *rcu_exp_gp_kworker; | ||||
| extern struct kthread_worker *rcu_exp_par_gp_kworker; | ||||
| #else /* !CONFIG_RCU_EXP_KTHREAD */ | ||||
| extern struct workqueue_struct *rcu_par_gp_wq; | ||||
| #endif /* CONFIG_RCU_EXP_KTHREAD */ | ||||
| void rcu_gp_slow_register(atomic_t *rgssp); | ||||
| void rcu_gp_slow_unregister(atomic_t *rgssp); | ||||
| #endif /* #else #ifdef CONFIG_TINY_RCU */ | ||||
|  |  | |||
|  | @ -764,9 +764,9 @@ kfree_scale_init(void) | |||
| 
 | ||||
| 	if (kfree_by_call_rcu) { | ||||
| 		/* do a test to check the timeout. */ | ||||
| 		orig_jif = rcu_lazy_get_jiffies_till_flush(); | ||||
| 		orig_jif = rcu_get_jiffies_lazy_flush(); | ||||
| 
 | ||||
| 		rcu_lazy_set_jiffies_till_flush(2 * HZ); | ||||
| 		rcu_set_jiffies_lazy_flush(2 * HZ); | ||||
| 		rcu_barrier(); | ||||
| 
 | ||||
| 		jif_start = jiffies; | ||||
|  | @ -775,7 +775,7 @@ kfree_scale_init(void) | |||
| 
 | ||||
| 		smp_cond_load_relaxed(&rcu_lazy_test1_cb_called, VAL == 1); | ||||
| 
 | ||||
| 		rcu_lazy_set_jiffies_till_flush(orig_jif); | ||||
| 		rcu_set_jiffies_lazy_flush(orig_jif); | ||||
| 
 | ||||
| 		if (WARN_ON_ONCE(jiffies_at_lazy_cb - jif_start < 2 * HZ)) { | ||||
| 			pr_alert("ERROR: call_rcu() CBs are not being lazy as expected!\n"); | ||||
|  |  | |||
|  | @ -1368,9 +1368,13 @@ rcu_torture_writer(void *arg) | |||
| 	struct rcu_torture *rp; | ||||
| 	struct rcu_torture *old_rp; | ||||
| 	static DEFINE_TORTURE_RANDOM(rand); | ||||
| 	unsigned long stallsdone = jiffies; | ||||
| 	bool stutter_waited; | ||||
| 	unsigned long ulo[NUM_ACTIVE_RCU_POLL_OLDSTATE]; | ||||
| 
 | ||||
| 	// If a new stall test is added, this must be adjusted.
 | ||||
| 	if (stall_cpu_holdoff + stall_gp_kthread + stall_cpu) | ||||
| 		stallsdone += (stall_cpu_holdoff + stall_gp_kthread + stall_cpu + 60) * HZ; | ||||
| 	VERBOSE_TOROUT_STRING("rcu_torture_writer task started"); | ||||
| 	if (!can_expedite) | ||||
| 		pr_alert("%s" TORTURE_FLAG | ||||
|  | @ -1576,11 +1580,11 @@ rcu_torture_writer(void *arg) | |||
| 		    !atomic_read(&rcu_fwd_cb_nodelay) && | ||||
| 		    !cur_ops->slow_gps && | ||||
| 		    !torture_must_stop() && | ||||
| 		    boot_ended) | ||||
| 		    boot_ended && | ||||
| 		    time_after(jiffies, stallsdone)) | ||||
| 			for (i = 0; i < ARRAY_SIZE(rcu_tortures); i++) | ||||
| 				if (list_empty(&rcu_tortures[i].rtort_free) && | ||||
| 				    rcu_access_pointer(rcu_torture_current) != | ||||
| 				    &rcu_tortures[i]) { | ||||
| 				    rcu_access_pointer(rcu_torture_current) != &rcu_tortures[i]) { | ||||
| 					tracing_off(); | ||||
| 					show_rcu_gp_kthreads(); | ||||
| 					WARN(1, "%s: rtort_pipe_count: %d\n", __func__, rcu_tortures[i].rtort_pipe_count); | ||||
|  | @ -2441,7 +2445,8 @@ static struct notifier_block rcu_torture_stall_block = { | |||
| 
 | ||||
| /*
 | ||||
|  * CPU-stall kthread.  It waits as specified by stall_cpu_holdoff, then | ||||
|  * induces a CPU stall for the time specified by stall_cpu. | ||||
|  * induces a CPU stall for the time specified by stall_cpu.  If a new | ||||
|  * stall test is added, stallsdone in rcu_torture_writer() must be adjusted. | ||||
|  */ | ||||
| static int rcu_torture_stall(void *args) | ||||
| { | ||||
|  |  | |||
|  | @ -1234,11 +1234,20 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, | |||
| 	if (rhp) | ||||
| 		rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp); | ||||
| 	/*
 | ||||
| 	 * The snapshot for acceleration must be taken _before_ the read of the | ||||
| 	 * current gp sequence used for advancing, otherwise advancing may fail | ||||
| 	 * and acceleration may then fail too. | ||||
| 	 * It's crucial to capture the snapshot 's' for acceleration before | ||||
| 	 * reading the current gp_seq that is used for advancing. This is | ||||
| 	 * essential because if the acceleration snapshot is taken after a | ||||
| 	 * failed advancement attempt, there's a risk that a grace period may | ||||
| 	 * conclude and a new one may start in the interim. If the snapshot is | ||||
| 	 * captured after this sequence of events, the acceleration snapshot 's' | ||||
| 	 * could be excessively advanced, leading to acceleration failure. | ||||
| 	 * In such a scenario, an 'acceleration leak' can occur, where new | ||||
| 	 * callbacks become indefinitely stuck in the RCU_NEXT_TAIL segment. | ||||
| 	 * Also note that encountering advancing failures is a normal | ||||
| 	 * occurrence when the grace period for RCU_WAIT_TAIL is in progress. | ||||
| 	 * | ||||
| 	 * This could happen if: | ||||
| 	 * To see this, consider the following events which occur if | ||||
| 	 * rcu_seq_snap() were to be called after advance: | ||||
| 	 * | ||||
| 	 *  1) The RCU_WAIT_TAIL segment has callbacks (gp_num = X + 4) and the | ||||
| 	 *     RCU_NEXT_READY_TAIL also has callbacks (gp_num = X + 8). | ||||
|  | @ -1264,6 +1273,13 @@ static unsigned long srcu_gp_start_if_needed(struct srcu_struct *ssp, | |||
| 	if (rhp) { | ||||
| 		rcu_segcblist_advance(&sdp->srcu_cblist, | ||||
| 				      rcu_seq_current(&ssp->srcu_sup->srcu_gp_seq)); | ||||
| 		/*
 | ||||
| 		 * Acceleration can never fail because the base current gp_seq | ||||
| 		 * used for acceleration is <= the value of gp_seq used for | ||||
| 		 * advancing. This means that RCU_NEXT_TAIL segment will | ||||
| 		 * always be able to be emptied by the acceleration into the | ||||
| 		 * RCU_NEXT_READY_TAIL or RCU_WAIT_TAIL segments. | ||||
| 		 */ | ||||
| 		WARN_ON_ONCE(!rcu_segcblist_accelerate(&sdp->srcu_cblist, s)); | ||||
| 	} | ||||
| 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { | ||||
|  |  | |||
|  | @ -24,22 +24,6 @@ void rcu_sync_init(struct rcu_sync *rsp) | |||
| 	init_waitqueue_head(&rsp->gp_wait); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * rcu_sync_enter_start - Force readers onto slow path for multiple updates | ||||
|  * @rsp: Pointer to rcu_sync structure to use for synchronization | ||||
|  * | ||||
|  * Must be called after rcu_sync_init() and before first use. | ||||
|  * | ||||
|  * Ensures rcu_sync_is_idle() returns false and rcu_sync_{enter,exit}() | ||||
|  * pairs turn into NO-OPs. | ||||
|  */ | ||||
| void rcu_sync_enter_start(struct rcu_sync *rsp) | ||||
| { | ||||
| 	rsp->gp_count++; | ||||
| 	rsp->gp_state = GP_PASSED; | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| static void rcu_sync_func(struct rcu_head *rhp); | ||||
| 
 | ||||
| static void rcu_sync_call(struct rcu_sync *rsp) | ||||
|  |  | |||
|  | @ -32,6 +32,7 @@ typedef void (*postgp_func_t)(struct rcu_tasks *rtp); | |||
|  * @rtp_irq_work: IRQ work queue for deferred wakeups. | ||||
|  * @barrier_q_head: RCU callback for barrier operation. | ||||
|  * @rtp_blkd_tasks: List of tasks blocked as readers. | ||||
|  * @rtp_exit_list: List of tasks in the latter portion of do_exit(). | ||||
|  * @cpu: CPU number corresponding to this entry. | ||||
|  * @rtpp: Pointer to the rcu_tasks structure. | ||||
|  */ | ||||
|  | @ -46,6 +47,7 @@ struct rcu_tasks_percpu { | |||
| 	struct irq_work rtp_irq_work; | ||||
| 	struct rcu_head barrier_q_head; | ||||
| 	struct list_head rtp_blkd_tasks; | ||||
| 	struct list_head rtp_exit_list; | ||||
| 	int cpu; | ||||
| 	struct rcu_tasks *rtpp; | ||||
| }; | ||||
|  | @ -144,8 +146,6 @@ static struct rcu_tasks rt_name =							\ | |||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_TASKS_RCU | ||||
| /* Track exiting tasks in order to allow them to be waited for. */ | ||||
| DEFINE_STATIC_SRCU(tasks_rcu_exit_srcu); | ||||
| 
 | ||||
| /* Report delay in synchronize_srcu() completion in rcu_tasks_postscan(). */ | ||||
| static void tasks_rcu_exit_srcu_stall(struct timer_list *unused); | ||||
|  | @ -240,7 +240,6 @@ static const char *tasks_gp_state_getname(struct rcu_tasks *rtp) | |||
| static void cblist_init_generic(struct rcu_tasks *rtp) | ||||
| { | ||||
| 	int cpu; | ||||
| 	unsigned long flags; | ||||
| 	int lim; | ||||
| 	int shift; | ||||
| 
 | ||||
|  | @ -266,15 +265,15 @@ static void cblist_init_generic(struct rcu_tasks *rtp) | |||
| 		WARN_ON_ONCE(!rtpcp); | ||||
| 		if (cpu) | ||||
| 			raw_spin_lock_init(&ACCESS_PRIVATE(rtpcp, lock)); | ||||
| 		local_irq_save(flags);  // serialize initialization
 | ||||
| 		if (rcu_segcblist_empty(&rtpcp->cblist)) | ||||
| 			rcu_segcblist_init(&rtpcp->cblist); | ||||
| 		local_irq_restore(flags); | ||||
| 		INIT_WORK(&rtpcp->rtp_work, rcu_tasks_invoke_cbs_wq); | ||||
| 		rtpcp->cpu = cpu; | ||||
| 		rtpcp->rtpp = rtp; | ||||
| 		if (!rtpcp->rtp_blkd_tasks.next) | ||||
| 			INIT_LIST_HEAD(&rtpcp->rtp_blkd_tasks); | ||||
| 		if (!rtpcp->rtp_exit_list.next) | ||||
| 			INIT_LIST_HEAD(&rtpcp->rtp_exit_list); | ||||
| 	} | ||||
| 
 | ||||
| 	pr_info("%s: Setting shift to %d and lim to %d rcu_task_cb_adjust=%d.\n", rtp->name, | ||||
|  | @ -851,10 +850,12 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) | |||
| //	number of voluntary context switches, and add that task to the
 | ||||
| //	holdout list.
 | ||||
| // rcu_tasks_postscan():
 | ||||
| //	Invoke synchronize_srcu() to ensure that all tasks that were
 | ||||
| //	in the process of exiting (and which thus might not know to
 | ||||
| //	synchronize with this RCU Tasks grace period) have completed
 | ||||
| //	exiting.
 | ||||
| //	Gather per-CPU lists of tasks in do_exit() to ensure that all
 | ||||
| //	tasks that were in the process of exiting (and which thus might
 | ||||
| //	not know to synchronize with this RCU Tasks grace period) have
 | ||||
| //	completed exiting.  The synchronize_rcu() in rcu_tasks_postgp()
 | ||||
| //	will take care of any tasks stuck in the non-preemptible region
 | ||||
| //	of do_exit() following its call to exit_tasks_rcu_stop().
 | ||||
| // check_all_holdout_tasks(), repeatedly until holdout list is empty:
 | ||||
| //	Scans the holdout list, attempting to identify a quiescent state
 | ||||
| //	for each task on the list.  If there is a quiescent state, the
 | ||||
|  | @ -867,8 +868,10 @@ static void rcu_tasks_wait_gp(struct rcu_tasks *rtp) | |||
| //	with interrupts disabled.
 | ||||
| //
 | ||||
| // For each exiting task, the exit_tasks_rcu_start() and
 | ||||
| // exit_tasks_rcu_finish() functions begin and end, respectively, the SRCU
 | ||||
| // read-side critical sections waited for by rcu_tasks_postscan().
 | ||||
| // exit_tasks_rcu_finish() functions add and remove, respectively, the
 | ||||
| // current task to a per-CPU list of tasks that rcu_tasks_postscan() must
 | ||||
| // wait on.  This is necessary because rcu_tasks_postscan() must wait on
 | ||||
| // tasks that have already been removed from the global list of tasks.
 | ||||
| //
 | ||||
| // Pre-grace-period update-side code is ordered before the grace
 | ||||
| // via the raw_spin_lock.*rcu_node().  Pre-grace-period read-side code
 | ||||
|  | @ -932,9 +935,13 @@ static void rcu_tasks_pertask(struct task_struct *t, struct list_head *hop) | |||
| 	} | ||||
| } | ||||
| 
 | ||||
| void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func); | ||||
| DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); | ||||
| 
 | ||||
| /* Processing between scanning taskslist and draining the holdout list. */ | ||||
| static void rcu_tasks_postscan(struct list_head *hop) | ||||
| { | ||||
| 	int cpu; | ||||
| 	int rtsi = READ_ONCE(rcu_task_stall_info); | ||||
| 
 | ||||
| 	if (!IS_ENABLED(CONFIG_TINY_RCU)) { | ||||
|  | @ -948,9 +955,9 @@ static void rcu_tasks_postscan(struct list_head *hop) | |||
| 	 * this, divide the fragile exit path part in two intersecting | ||||
| 	 * read side critical sections: | ||||
| 	 * | ||||
| 	 * 1) An _SRCU_ read side starting before calling exit_notify(), | ||||
| 	 *    which may remove the task from the tasklist, and ending after | ||||
| 	 *    the final preempt_disable() call in do_exit(). | ||||
| 	 * 1) A task_struct list addition before calling exit_notify(), | ||||
| 	 *    which may remove the task from the tasklist, with the | ||||
| 	 *    removal after the final preempt_disable() call in do_exit(). | ||||
| 	 * | ||||
| 	 * 2) An _RCU_ read side starting with the final preempt_disable() | ||||
| 	 *    call in do_exit() and ending with the final call to schedule() | ||||
|  | @ -959,7 +966,37 @@ static void rcu_tasks_postscan(struct list_head *hop) | |||
| 	 * This handles the part 1). And postgp will handle part 2) with a | ||||
| 	 * call to synchronize_rcu(). | ||||
| 	 */ | ||||
| 	synchronize_srcu(&tasks_rcu_exit_srcu); | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		unsigned long j = jiffies + 1; | ||||
| 		struct rcu_tasks_percpu *rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, cpu); | ||||
| 		struct task_struct *t; | ||||
| 		struct task_struct *t1; | ||||
| 		struct list_head tmp; | ||||
| 
 | ||||
| 		raw_spin_lock_irq_rcu_node(rtpcp); | ||||
| 		list_for_each_entry_safe(t, t1, &rtpcp->rtp_exit_list, rcu_tasks_exit_list) { | ||||
| 			if (list_empty(&t->rcu_tasks_holdout_list)) | ||||
| 				rcu_tasks_pertask(t, hop); | ||||
| 
 | ||||
| 			// RT kernels need frequent pauses, otherwise
 | ||||
| 			// pause at least once per pair of jiffies.
 | ||||
| 			if (!IS_ENABLED(CONFIG_PREEMPT_RT) && time_before(jiffies, j)) | ||||
| 				continue; | ||||
| 
 | ||||
| 			// Keep our place in the list while pausing.
 | ||||
| 			// Nothing else traverses this list, so adding a
 | ||||
| 			// bare list_head is OK.
 | ||||
| 			list_add(&tmp, &t->rcu_tasks_exit_list); | ||||
| 			raw_spin_unlock_irq_rcu_node(rtpcp); | ||||
| 			cond_resched(); // For CONFIG_PREEMPT=n kernels
 | ||||
| 			raw_spin_lock_irq_rcu_node(rtpcp); | ||||
| 			t1 = list_entry(tmp.next, struct task_struct, rcu_tasks_exit_list); | ||||
| 			list_del(&tmp); | ||||
| 			j = jiffies + 1; | ||||
| 		} | ||||
| 		raw_spin_unlock_irq_rcu_node(rtpcp); | ||||
| 	} | ||||
| 
 | ||||
| 	if (!IS_ENABLED(CONFIG_TINY_RCU)) | ||||
| 		del_timer_sync(&tasks_rcu_exit_srcu_stall_timer); | ||||
|  | @ -1027,7 +1064,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp) | |||
| 	 * | ||||
| 	 * In addition, this synchronize_rcu() waits for exiting tasks | ||||
| 	 * to complete their final preempt_disable() region of execution, | ||||
| 	 * cleaning up after synchronize_srcu(&tasks_rcu_exit_srcu), | ||||
| 	 * enforcing the whole region before tasklist removal until | ||||
| 	 * the final schedule() with TASK_DEAD state to be an RCU TASKS | ||||
| 	 * read side critical section. | ||||
|  | @ -1035,9 +1071,6 @@ static void rcu_tasks_postgp(struct rcu_tasks *rtp) | |||
| 	synchronize_rcu(); | ||||
| } | ||||
| 
 | ||||
| void call_rcu_tasks(struct rcu_head *rhp, rcu_callback_t func); | ||||
| DEFINE_RCU_TASKS(rcu_tasks, rcu_tasks_wait_gp, call_rcu_tasks, "RCU Tasks"); | ||||
| 
 | ||||
| static void tasks_rcu_exit_srcu_stall(struct timer_list *unused) | ||||
| { | ||||
| #ifndef CONFIG_TINY_RCU | ||||
|  | @ -1118,7 +1151,6 @@ module_param(rcu_tasks_lazy_ms, int, 0444); | |||
| 
 | ||||
| static int __init rcu_spawn_tasks_kthread(void) | ||||
| { | ||||
| 	cblist_init_generic(&rcu_tasks); | ||||
| 	rcu_tasks.gp_sleep = HZ / 10; | ||||
| 	rcu_tasks.init_fract = HZ / 10; | ||||
| 	if (rcu_tasks_lazy_ms >= 0) | ||||
|  | @ -1147,25 +1179,48 @@ struct task_struct *get_rcu_tasks_gp_kthread(void) | |||
| EXPORT_SYMBOL_GPL(get_rcu_tasks_gp_kthread); | ||||
| 
 | ||||
| /*
 | ||||
|  * Contribute to protect against tasklist scan blind spot while the | ||||
|  * task is exiting and may be removed from the tasklist. See | ||||
|  * corresponding synchronize_srcu() for further details. | ||||
|  * Protect against tasklist scan blind spot while the task is exiting and | ||||
|  * may be removed from the tasklist.  Do this by adding the task to yet | ||||
|  * another list. | ||||
|  * | ||||
|  * Note that the task will remove itself from this list, so there is no | ||||
|  * need for get_task_struct(), except in the case where rcu_tasks_pertask() | ||||
|  * adds it to the holdout list, in which case rcu_tasks_pertask() supplies | ||||
|  * the needed get_task_struct(). | ||||
|  */ | ||||
| void exit_tasks_rcu_start(void) __acquires(&tasks_rcu_exit_srcu) | ||||
| void exit_tasks_rcu_start(void) | ||||
| { | ||||
| 	current->rcu_tasks_idx = __srcu_read_lock(&tasks_rcu_exit_srcu); | ||||
| 	unsigned long flags; | ||||
| 	struct rcu_tasks_percpu *rtpcp; | ||||
| 	struct task_struct *t = current; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(!list_empty(&t->rcu_tasks_exit_list)); | ||||
| 	preempt_disable(); | ||||
| 	rtpcp = this_cpu_ptr(rcu_tasks.rtpcpu); | ||||
| 	t->rcu_tasks_exit_cpu = smp_processor_id(); | ||||
| 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags); | ||||
| 	if (!rtpcp->rtp_exit_list.next) | ||||
| 		INIT_LIST_HEAD(&rtpcp->rtp_exit_list); | ||||
| 	list_add(&t->rcu_tasks_exit_list, &rtpcp->rtp_exit_list); | ||||
| 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); | ||||
| 	preempt_enable(); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Contribute to protect against tasklist scan blind spot while the | ||||
|  * task is exiting and may be removed from the tasklist. See | ||||
|  * corresponding synchronize_srcu() for further details. | ||||
|  * Remove the task from the "yet another list" because do_exit() is now | ||||
|  * non-preemptible, allowing synchronize_rcu() to wait beyond this point. | ||||
|  */ | ||||
| void exit_tasks_rcu_stop(void) __releases(&tasks_rcu_exit_srcu) | ||||
| void exit_tasks_rcu_stop(void) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	struct rcu_tasks_percpu *rtpcp; | ||||
| 	struct task_struct *t = current; | ||||
| 
 | ||||
| 	__srcu_read_unlock(&tasks_rcu_exit_srcu, t->rcu_tasks_idx); | ||||
| 	WARN_ON_ONCE(list_empty(&t->rcu_tasks_exit_list)); | ||||
| 	rtpcp = per_cpu_ptr(rcu_tasks.rtpcpu, t->rcu_tasks_exit_cpu); | ||||
| 	raw_spin_lock_irqsave_rcu_node(rtpcp, flags); | ||||
| 	list_del_init(&t->rcu_tasks_exit_list); | ||||
| 	raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -1282,7 +1337,6 @@ module_param(rcu_tasks_rude_lazy_ms, int, 0444); | |||
| 
 | ||||
| static int __init rcu_spawn_tasks_rude_kthread(void) | ||||
| { | ||||
| 	cblist_init_generic(&rcu_tasks_rude); | ||||
| 	rcu_tasks_rude.gp_sleep = HZ / 10; | ||||
| 	if (rcu_tasks_rude_lazy_ms >= 0) | ||||
| 		rcu_tasks_rude.lazy_jiffies = msecs_to_jiffies(rcu_tasks_rude_lazy_ms); | ||||
|  | @ -1914,7 +1968,6 @@ module_param(rcu_tasks_trace_lazy_ms, int, 0444); | |||
| 
 | ||||
| static int __init rcu_spawn_tasks_trace_kthread(void) | ||||
| { | ||||
| 	cblist_init_generic(&rcu_tasks_trace); | ||||
| 	if (IS_ENABLED(CONFIG_TASKS_TRACE_RCU_READ_MB)) { | ||||
| 		rcu_tasks_trace.gp_sleep = HZ / 10; | ||||
| 		rcu_tasks_trace.init_fract = HZ / 10; | ||||
|  | @ -2086,6 +2139,24 @@ late_initcall(rcu_tasks_verify_schedule_work); | |||
| static void rcu_tasks_initiate_self_tests(void) { } | ||||
| #endif /* #else #ifdef CONFIG_PROVE_RCU */ | ||||
| 
 | ||||
| void __init tasks_cblist_init_generic(void) | ||||
| { | ||||
| 	lockdep_assert_irqs_disabled(); | ||||
| 	WARN_ON(num_online_cpus() > 1); | ||||
| 
 | ||||
| #ifdef CONFIG_TASKS_RCU | ||||
| 	cblist_init_generic(&rcu_tasks); | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_TASKS_RUDE_RCU | ||||
| 	cblist_init_generic(&rcu_tasks_rude); | ||||
| #endif | ||||
| 
 | ||||
| #ifdef CONFIG_TASKS_TRACE_RCU | ||||
| 	cblist_init_generic(&rcu_tasks_trace); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| void __init rcu_init_tasks_generic(void) | ||||
| { | ||||
| #ifdef CONFIG_TASKS_RCU | ||||
|  |  | |||
|  | @ -261,4 +261,5 @@ void __init rcu_init(void) | |||
| { | ||||
| 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||||
| 	rcu_early_boot_tests(); | ||||
| 	tasks_cblist_init_generic(); | ||||
| } | ||||
|  |  | |||
|  | @ -145,7 +145,7 @@ static int rcu_scheduler_fully_active __read_mostly; | |||
| 
 | ||||
| static void rcu_report_qs_rnp(unsigned long mask, struct rcu_node *rnp, | ||||
| 			      unsigned long gps, unsigned long flags); | ||||
| static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu); | ||||
| static struct task_struct *rcu_boost_task(struct rcu_node *rnp); | ||||
| static void invoke_rcu_core(void); | ||||
| static void rcu_report_exp_rdp(struct rcu_data *rdp); | ||||
| static void sync_sched_exp_online_cleanup(int cpu); | ||||
|  | @ -2145,6 +2145,12 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
| 	 * Extract the list of ready callbacks, disabling IRQs to prevent | ||||
| 	 * races with call_rcu() from interrupt handlers.  Leave the | ||||
| 	 * callback counts, as rcu_barrier() needs to be conservative. | ||||
| 	 * | ||||
| 	 * Callbacks execution is fully ordered against preceding grace period | ||||
| 	 * completion (materialized by rnp->gp_seq update) thanks to the | ||||
| 	 * smp_mb__after_unlock_lock() upon node locking required for callbacks | ||||
| 	 * advancing. In NOCB mode this ordering is then further relayed through | ||||
| 	 * the nocb locking that protects both callbacks advancing and extraction. | ||||
| 	 */ | ||||
| 	rcu_nocb_lock_irqsave(rdp, flags); | ||||
| 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | ||||
|  | @ -2591,12 +2597,26 @@ static int __init rcu_spawn_core_kthreads(void) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static void rcutree_enqueue(struct rcu_data *rdp, struct rcu_head *head, rcu_callback_t func) | ||||
| { | ||||
| 	rcu_segcblist_enqueue(&rdp->cblist, head); | ||||
| 	if (__is_kvfree_rcu_offset((unsigned long)func)) | ||||
| 		trace_rcu_kvfree_callback(rcu_state.name, head, | ||||
| 					 (unsigned long)func, | ||||
| 					 rcu_segcblist_n_cbs(&rdp->cblist)); | ||||
| 	else | ||||
| 		trace_rcu_callback(rcu_state.name, head, | ||||
| 				   rcu_segcblist_n_cbs(&rdp->cblist)); | ||||
| 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Handle any core-RCU processing required by a call_rcu() invocation. | ||||
|  */ | ||||
| static void __call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, | ||||
| 			    unsigned long flags) | ||||
| static void call_rcu_core(struct rcu_data *rdp, struct rcu_head *head, | ||||
| 			  rcu_callback_t func, unsigned long flags) | ||||
| { | ||||
| 	rcutree_enqueue(rdp, head, func); | ||||
| 	/*
 | ||||
| 	 * If called from an extended quiescent state, invoke the RCU | ||||
| 	 * core in order to force a re-evaluation of RCU's idleness. | ||||
|  | @ -2692,7 +2712,6 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) | |||
| 	unsigned long flags; | ||||
| 	bool lazy; | ||||
| 	struct rcu_data *rdp; | ||||
| 	bool was_alldone; | ||||
| 
 | ||||
| 	/* Misaligned rcu_head! */ | ||||
| 	WARN_ON_ONCE((unsigned long)head & (sizeof(void *) - 1)); | ||||
|  | @ -2729,30 +2748,18 @@ __call_rcu_common(struct rcu_head *head, rcu_callback_t func, bool lazy_in) | |||
| 	} | ||||
| 
 | ||||
| 	check_cb_ovld(rdp); | ||||
| 	if (rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) | ||||
| 		return; // Enqueued onto ->nocb_bypass, so just leave.
 | ||||
| 	// If no-CBs CPU gets here, rcu_nocb_try_bypass() acquired ->nocb_lock.
 | ||||
| 	rcu_segcblist_enqueue(&rdp->cblist, head); | ||||
| 	if (__is_kvfree_rcu_offset((unsigned long)func)) | ||||
| 		trace_rcu_kvfree_callback(rcu_state.name, head, | ||||
| 					 (unsigned long)func, | ||||
| 					 rcu_segcblist_n_cbs(&rdp->cblist)); | ||||
| 
 | ||||
| 	if (unlikely(rcu_rdp_is_offloaded(rdp))) | ||||
| 		call_rcu_nocb(rdp, head, func, flags, lazy); | ||||
| 	else | ||||
| 		trace_rcu_callback(rcu_state.name, head, | ||||
| 				   rcu_segcblist_n_cbs(&rdp->cblist)); | ||||
| 
 | ||||
| 	trace_rcu_segcb_stats(&rdp->cblist, TPS("SegCBQueued")); | ||||
| 
 | ||||
| 	/* Go handle any RCU core processing required. */ | ||||
| 	if (unlikely(rcu_rdp_is_offloaded(rdp))) { | ||||
| 		__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ | ||||
| 	} else { | ||||
| 		__call_rcu_core(rdp, head, flags); | ||||
| 		local_irq_restore(flags); | ||||
| 	} | ||||
| 		call_rcu_core(rdp, head, func, flags); | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_LAZY | ||||
| static bool enable_rcu_lazy __read_mostly = !IS_ENABLED(CONFIG_RCU_LAZY_DEFAULT_OFF); | ||||
| module_param(enable_rcu_lazy, bool, 0444); | ||||
| 
 | ||||
| /**
 | ||||
|  * call_rcu_hurry() - Queue RCU callback for invocation after grace period, and | ||||
|  * flush all lazy callbacks (including the new one) to the main ->cblist while | ||||
|  | @ -2778,6 +2785,8 @@ void call_rcu_hurry(struct rcu_head *head, rcu_callback_t func) | |||
| 	__call_rcu_common(head, func, false); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(call_rcu_hurry); | ||||
| #else | ||||
| #define enable_rcu_lazy		false | ||||
| #endif | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -2826,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu_hurry); | |||
|  */ | ||||
| void call_rcu(struct rcu_head *head, rcu_callback_t func) | ||||
| { | ||||
| 	__call_rcu_common(head, func, IS_ENABLED(CONFIG_RCU_LAZY)); | ||||
| 	__call_rcu_common(head, func, enable_rcu_lazy); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(call_rcu); | ||||
| 
 | ||||
|  | @ -4394,6 +4403,66 @@ rcu_boot_init_percpu_data(int cpu) | |||
| 	rcu_boot_init_nocb_percpu_data(rdp); | ||||
| } | ||||
| 
 | ||||
| struct kthread_worker *rcu_exp_gp_kworker; | ||||
| 
 | ||||
| static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp) | ||||
| { | ||||
| 	struct kthread_worker *kworker; | ||||
| 	const char *name = "rcu_exp_par_gp_kthread_worker/%d"; | ||||
| 	struct sched_param param = { .sched_priority = kthread_prio }; | ||||
| 	int rnp_index = rnp - rcu_get_root(); | ||||
| 
 | ||||
| 	if (rnp->exp_kworker) | ||||
| 		return; | ||||
| 
 | ||||
| 	kworker = kthread_create_worker(0, name, rnp_index); | ||||
| 	if (IS_ERR_OR_NULL(kworker)) { | ||||
| 		pr_err("Failed to create par gp kworker on %d/%d\n", | ||||
| 		       rnp->grplo, rnp->grphi); | ||||
| 		return; | ||||
| 	} | ||||
| 	WRITE_ONCE(rnp->exp_kworker, kworker); | ||||
| 
 | ||||
| 	if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD)) | ||||
| 		sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, ¶m); | ||||
| } | ||||
| 
 | ||||
| static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp) | ||||
| { | ||||
| 	struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker); | ||||
| 
 | ||||
| 	if (!kworker) | ||||
| 		return NULL; | ||||
| 
 | ||||
| 	return kworker->task; | ||||
| } | ||||
| 
 | ||||
| static void __init rcu_start_exp_gp_kworker(void) | ||||
| { | ||||
| 	const char *name = "rcu_exp_gp_kthread_worker"; | ||||
| 	struct sched_param param = { .sched_priority = kthread_prio }; | ||||
| 
 | ||||
| 	rcu_exp_gp_kworker = kthread_create_worker(0, name); | ||||
| 	if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { | ||||
| 		pr_err("Failed to create %s!\n", name); | ||||
| 		rcu_exp_gp_kworker = NULL; | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD)) | ||||
| 		sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); | ||||
| } | ||||
| 
 | ||||
| static void rcu_spawn_rnp_kthreads(struct rcu_node *rnp) | ||||
| { | ||||
| 	if (rcu_scheduler_fully_active) { | ||||
| 		mutex_lock(&rnp->kthread_mutex); | ||||
| 		rcu_spawn_one_boost_kthread(rnp); | ||||
| 		rcu_spawn_exp_par_gp_kworker(rnp); | ||||
| 		mutex_unlock(&rnp->kthread_mutex); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Invoked early in the CPU-online process, when pretty much all services | ||||
|  * are available.  The incoming CPU is not present. | ||||
|  | @ -4442,7 +4511,7 @@ int rcutree_prepare_cpu(unsigned int cpu) | |||
| 	rdp->rcu_iw_gp_seq = rdp->gp_seq - 1; | ||||
| 	trace_rcu_grace_period(rcu_state.name, rdp->gp_seq, TPS("cpuonl")); | ||||
| 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||||
| 	rcu_spawn_one_boost_kthread(rnp); | ||||
| 	rcu_spawn_rnp_kthreads(rnp); | ||||
| 	rcu_spawn_cpu_nocb_kthread(cpu); | ||||
| 	WRITE_ONCE(rcu_state.n_online_cpus, rcu_state.n_online_cpus + 1); | ||||
| 
 | ||||
|  | @ -4450,13 +4519,64 @@ int rcutree_prepare_cpu(unsigned int cpu) | |||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Update RCU priority boot kthread affinity for CPU-hotplug changes. | ||||
|  * Update kthreads affinity during CPU-hotplug changes. | ||||
|  * | ||||
|  * Set the per-rcu_node kthread's affinity to cover all CPUs that are | ||||
|  * served by the rcu_node in question.  The CPU hotplug lock is still | ||||
|  * held, so the value of rnp->qsmaskinit will be stable. | ||||
|  * | ||||
|  * We don't include outgoingcpu in the affinity set, use -1 if there is | ||||
|  * no outgoing CPU.  If there are no CPUs left in the affinity set, | ||||
|  * this function allows the kthread to execute on any CPU. | ||||
|  * | ||||
|  * Any future concurrent calls are serialized via ->kthread_mutex. | ||||
|  */ | ||||
| static void rcutree_affinity_setting(unsigned int cpu, int outgoing) | ||||
| static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu) | ||||
| { | ||||
| 	struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); | ||||
| 	cpumask_var_t cm; | ||||
| 	unsigned long mask; | ||||
| 	struct rcu_data *rdp; | ||||
| 	struct rcu_node *rnp; | ||||
| 	struct task_struct *task_boost, *task_exp; | ||||
| 
 | ||||
| 	rcu_boost_kthread_setaffinity(rdp->mynode, outgoing); | ||||
| 	rdp = per_cpu_ptr(&rcu_data, cpu); | ||||
| 	rnp = rdp->mynode; | ||||
| 
 | ||||
| 	task_boost = rcu_boost_task(rnp); | ||||
| 	task_exp = rcu_exp_par_gp_task(rnp); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If CPU is the boot one, those tasks are created later from early | ||||
| 	 * initcall since kthreadd must be created first. | ||||
| 	 */ | ||||
| 	if (!task_boost && !task_exp) | ||||
| 		return; | ||||
| 
 | ||||
| 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) | ||||
| 		return; | ||||
| 
 | ||||
| 	mutex_lock(&rnp->kthread_mutex); | ||||
| 	mask = rcu_rnp_online_cpus(rnp); | ||||
| 	for_each_leaf_node_possible_cpu(rnp, cpu) | ||||
| 		if ((mask & leaf_node_cpu_bit(rnp, cpu)) && | ||||
| 		    cpu != outgoingcpu) | ||||
| 			cpumask_set_cpu(cpu, cm); | ||||
| 	cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); | ||||
| 	if (cpumask_empty(cm)) { | ||||
| 		cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); | ||||
| 		if (outgoingcpu >= 0) | ||||
| 			cpumask_clear_cpu(outgoingcpu, cm); | ||||
| 	} | ||||
| 
 | ||||
| 	if (task_exp) | ||||
| 		set_cpus_allowed_ptr(task_exp, cm); | ||||
| 
 | ||||
| 	if (task_boost) | ||||
| 		set_cpus_allowed_ptr(task_boost, cm); | ||||
| 
 | ||||
| 	mutex_unlock(&rnp->kthread_mutex); | ||||
| 
 | ||||
| 	free_cpumask_var(cm); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -4640,8 +4760,9 @@ void rcutree_migrate_callbacks(int cpu) | |||
| 		__call_rcu_nocb_wake(my_rdp, true, flags); | ||||
| 	} else { | ||||
| 		rcu_nocb_unlock(my_rdp); /* irqs remain disabled. */ | ||||
| 		raw_spin_unlock_irqrestore_rcu_node(my_rnp, flags); | ||||
| 		raw_spin_unlock_rcu_node(my_rnp); /* irqs remain disabled. */ | ||||
| 	} | ||||
| 	local_irq_restore(flags); | ||||
| 	if (needwake) | ||||
| 		rcu_gp_kthread_wake(); | ||||
| 	lockdep_assert_irqs_enabled(); | ||||
|  | @ -4730,51 +4851,6 @@ static int rcu_pm_notify(struct notifier_block *self, | |||
| 	return NOTIFY_OK; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_EXP_KTHREAD | ||||
| struct kthread_worker *rcu_exp_gp_kworker; | ||||
| struct kthread_worker *rcu_exp_par_gp_kworker; | ||||
| 
 | ||||
| static void __init rcu_start_exp_gp_kworkers(void) | ||||
| { | ||||
| 	const char *par_gp_kworker_name = "rcu_exp_par_gp_kthread_worker"; | ||||
| 	const char *gp_kworker_name = "rcu_exp_gp_kthread_worker"; | ||||
| 	struct sched_param param = { .sched_priority = kthread_prio }; | ||||
| 
 | ||||
| 	rcu_exp_gp_kworker = kthread_create_worker(0, gp_kworker_name); | ||||
| 	if (IS_ERR_OR_NULL(rcu_exp_gp_kworker)) { | ||||
| 		pr_err("Failed to create %s!\n", gp_kworker_name); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	rcu_exp_par_gp_kworker = kthread_create_worker(0, par_gp_kworker_name); | ||||
| 	if (IS_ERR_OR_NULL(rcu_exp_par_gp_kworker)) { | ||||
| 		pr_err("Failed to create %s!\n", par_gp_kworker_name); | ||||
| 		kthread_destroy_worker(rcu_exp_gp_kworker); | ||||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	sched_setscheduler_nocheck(rcu_exp_gp_kworker->task, SCHED_FIFO, ¶m); | ||||
| 	sched_setscheduler_nocheck(rcu_exp_par_gp_kworker->task, SCHED_FIFO, | ||||
| 				   ¶m); | ||||
| } | ||||
| 
 | ||||
| static inline void rcu_alloc_par_gp_wq(void) | ||||
| { | ||||
| } | ||||
| #else /* !CONFIG_RCU_EXP_KTHREAD */ | ||||
| struct workqueue_struct *rcu_par_gp_wq; | ||||
| 
 | ||||
| static void __init rcu_start_exp_gp_kworkers(void) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline void rcu_alloc_par_gp_wq(void) | ||||
| { | ||||
| 	rcu_par_gp_wq = alloc_workqueue("rcu_par_gp", WQ_MEM_RECLAIM, 0); | ||||
| 	WARN_ON(!rcu_par_gp_wq); | ||||
| } | ||||
| #endif /* CONFIG_RCU_EXP_KTHREAD */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Spawn the kthreads that handle RCU's grace periods. | ||||
|  */ | ||||
|  | @ -4809,10 +4885,10 @@ static int __init rcu_spawn_gp_kthread(void) | |||
| 	 * due to rcu_scheduler_fully_active. | ||||
| 	 */ | ||||
| 	rcu_spawn_cpu_nocb_kthread(smp_processor_id()); | ||||
| 	rcu_spawn_one_boost_kthread(rdp->mynode); | ||||
| 	rcu_spawn_rnp_kthreads(rdp->mynode); | ||||
| 	rcu_spawn_core_kthreads(); | ||||
| 	/* Create kthread worker for expedited GPs */ | ||||
| 	rcu_start_exp_gp_kworkers(); | ||||
| 	rcu_start_exp_gp_kworker(); | ||||
| 	return 0; | ||||
| } | ||||
| early_initcall(rcu_spawn_gp_kthread); | ||||
|  | @ -4915,7 +4991,7 @@ static void __init rcu_init_one(void) | |||
| 			init_waitqueue_head(&rnp->exp_wq[2]); | ||||
| 			init_waitqueue_head(&rnp->exp_wq[3]); | ||||
| 			spin_lock_init(&rnp->exp_lock); | ||||
| 			mutex_init(&rnp->boost_kthread_mutex); | ||||
| 			mutex_init(&rnp->kthread_mutex); | ||||
| 			raw_spin_lock_init(&rnp->exp_poll_lock); | ||||
| 			rnp->exp_seq_poll_rq = RCU_GET_STATE_COMPLETED; | ||||
| 			INIT_WORK(&rnp->exp_poll_wq, sync_rcu_do_polled_gp); | ||||
|  | @ -5152,7 +5228,6 @@ void __init rcu_init(void) | |||
| 	/* Create workqueue for Tree SRCU and for expedited GPs. */ | ||||
| 	rcu_gp_wq = alloc_workqueue("rcu_gp", WQ_MEM_RECLAIM, 0); | ||||
| 	WARN_ON(!rcu_gp_wq); | ||||
| 	rcu_alloc_par_gp_wq(); | ||||
| 
 | ||||
| 	/* Fill in default value for rcutree.qovld boot parameter. */ | ||||
| 	/* -After- the rcu_node ->lock fields are initialized! */ | ||||
|  | @ -5165,6 +5240,8 @@ void __init rcu_init(void) | |||
| 	(void)start_poll_synchronize_rcu_expedited(); | ||||
| 
 | ||||
| 	rcu_test_sync_prims(); | ||||
| 
 | ||||
| 	tasks_cblist_init_generic(); | ||||
| } | ||||
| 
 | ||||
| #include "tree_stall.h" | ||||
|  |  | |||
|  | @ -21,14 +21,10 @@ | |||
| 
 | ||||
| #include "rcu_segcblist.h" | ||||
| 
 | ||||
| /* Communicate arguments to a workqueue handler. */ | ||||
| /* Communicate arguments to a kthread worker handler. */ | ||||
| struct rcu_exp_work { | ||||
| 	unsigned long rew_s; | ||||
| #ifdef CONFIG_RCU_EXP_KTHREAD | ||||
| 	struct kthread_work rew_work; | ||||
| #else | ||||
| 	struct work_struct rew_work; | ||||
| #endif /* CONFIG_RCU_EXP_KTHREAD */ | ||||
| }; | ||||
| 
 | ||||
| /* RCU's kthread states for tracing. */ | ||||
|  | @ -72,6 +68,9 @@ struct rcu_node { | |||
| 				/* Online CPUs for next expedited GP. */ | ||||
| 				/*  Any CPU that has ever been online will */ | ||||
| 				/*  have its bit set. */ | ||||
| 	struct kthread_worker *exp_kworker; | ||||
| 				/* Workers performing per node expedited GP */ | ||||
| 				/* initialization. */ | ||||
| 	unsigned long cbovldmask; | ||||
| 				/* CPUs experiencing callback overload. */ | ||||
| 	unsigned long ffmask;	/* Fully functional CPUs. */ | ||||
|  | @ -113,7 +112,7 @@ struct rcu_node { | |||
| 				/*  side effect, not as a lock. */ | ||||
| 	unsigned long boost_time; | ||||
| 				/* When to start boosting (jiffies). */ | ||||
| 	struct mutex boost_kthread_mutex; | ||||
| 	struct mutex kthread_mutex; | ||||
| 				/* Exclusion for thread spawning and affinity */ | ||||
| 				/*  manipulation. */ | ||||
| 	struct task_struct *boost_kthread_task; | ||||
|  | @ -467,11 +466,10 @@ static void rcu_init_one_nocb(struct rcu_node *rnp); | |||
| static bool wake_nocb_gp(struct rcu_data *rdp, bool force); | ||||
| static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | ||||
| 				  unsigned long j, bool lazy); | ||||
| static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | ||||
| 				bool *was_alldone, unsigned long flags, | ||||
| 				bool lazy); | ||||
| static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, | ||||
| 				 unsigned long flags); | ||||
| static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, | ||||
| 			  rcu_callback_t func, unsigned long flags, bool lazy); | ||||
| static void __maybe_unused __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, | ||||
| 						unsigned long flags); | ||||
| static int rcu_nocb_need_deferred_wakeup(struct rcu_data *rdp, int level); | ||||
| static bool do_nocb_deferred_wakeup(struct rcu_data *rdp); | ||||
| static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); | ||||
|  |  | |||
|  | @ -198,10 +198,9 @@ static void __rcu_report_exp_rnp(struct rcu_node *rnp, | |||
| 		} | ||||
| 		if (rnp->parent == NULL) { | ||||
| 			raw_spin_unlock_irqrestore_rcu_node(rnp, flags); | ||||
| 			if (wake) { | ||||
| 				smp_mb(); /* EGP done before wake_up(). */ | ||||
| 			if (wake) | ||||
| 				swake_up_one_online(&rcu_state.expedited_wq); | ||||
| 			} | ||||
| 
 | ||||
| 			break; | ||||
| 		} | ||||
| 		mask = rnp->grpmask; | ||||
|  | @ -419,7 +418,6 @@ retry_ipi: | |||
| 
 | ||||
| static void rcu_exp_sel_wait_wake(unsigned long s); | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_EXP_KTHREAD | ||||
| static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) | ||||
| { | ||||
| 	struct rcu_exp_work *rewp = | ||||
|  | @ -428,9 +426,14 @@ static void sync_rcu_exp_select_node_cpus(struct kthread_work *wp) | |||
| 	__sync_rcu_exp_select_node_cpus(rewp); | ||||
| } | ||||
| 
 | ||||
| static inline bool rcu_gp_par_worker_started(void) | ||||
| static inline bool rcu_exp_worker_started(void) | ||||
| { | ||||
| 	return !!READ_ONCE(rcu_exp_par_gp_kworker); | ||||
| 	return !!READ_ONCE(rcu_exp_gp_kworker); | ||||
| } | ||||
| 
 | ||||
| static inline bool rcu_exp_par_worker_started(struct rcu_node *rnp) | ||||
| { | ||||
| 	return !!READ_ONCE(rnp->exp_kworker); | ||||
| } | ||||
| 
 | ||||
| static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) | ||||
|  | @ -441,7 +444,7 @@ static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) | |||
| 	 * another work item on the same kthread worker can result in | ||||
| 	 * deadlock. | ||||
| 	 */ | ||||
| 	kthread_queue_work(rcu_exp_par_gp_kworker, &rnp->rew.rew_work); | ||||
| 	kthread_queue_work(READ_ONCE(rnp->exp_kworker), &rnp->rew.rew_work); | ||||
| } | ||||
| 
 | ||||
| static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) | ||||
|  | @ -466,64 +469,6 @@ static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew | |||
| 	kthread_queue_work(rcu_exp_gp_kworker, &rew->rew_work); | ||||
| } | ||||
| 
 | ||||
| static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew) | ||||
| { | ||||
| } | ||||
| #else /* !CONFIG_RCU_EXP_KTHREAD */ | ||||
| static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) | ||||
| { | ||||
| 	struct rcu_exp_work *rewp = | ||||
| 		container_of(wp, struct rcu_exp_work, rew_work); | ||||
| 
 | ||||
| 	__sync_rcu_exp_select_node_cpus(rewp); | ||||
| } | ||||
| 
 | ||||
| static inline bool rcu_gp_par_worker_started(void) | ||||
| { | ||||
| 	return !!READ_ONCE(rcu_par_gp_wq); | ||||
| } | ||||
| 
 | ||||
| static inline void sync_rcu_exp_select_cpus_queue_work(struct rcu_node *rnp) | ||||
| { | ||||
| 	int cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); | ||||
| 
 | ||||
| 	INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); | ||||
| 	/* If all offline, queue the work on an unbound CPU. */ | ||||
| 	if (unlikely(cpu > rnp->grphi - rnp->grplo)) | ||||
| 		cpu = WORK_CPU_UNBOUND; | ||||
| 	else | ||||
| 		cpu += rnp->grplo; | ||||
| 	queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); | ||||
| } | ||||
| 
 | ||||
| static inline void sync_rcu_exp_select_cpus_flush_work(struct rcu_node *rnp) | ||||
| { | ||||
| 	flush_work(&rnp->rew.rew_work); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Work-queue handler to drive an expedited grace period forward. | ||||
|  */ | ||||
| static void wait_rcu_exp_gp(struct work_struct *wp) | ||||
| { | ||||
| 	struct rcu_exp_work *rewp; | ||||
| 
 | ||||
| 	rewp = container_of(wp, struct rcu_exp_work, rew_work); | ||||
| 	rcu_exp_sel_wait_wake(rewp->rew_s); | ||||
| } | ||||
| 
 | ||||
| static inline void synchronize_rcu_expedited_queue_work(struct rcu_exp_work *rew) | ||||
| { | ||||
| 	INIT_WORK_ONSTACK(&rew->rew_work, wait_rcu_exp_gp); | ||||
| 	queue_work(rcu_gp_wq, &rew->rew_work); | ||||
| } | ||||
| 
 | ||||
| static inline void synchronize_rcu_expedited_destroy_work(struct rcu_exp_work *rew) | ||||
| { | ||||
| 	destroy_work_on_stack(&rew->rew_work); | ||||
| } | ||||
| #endif /* CONFIG_RCU_EXP_KTHREAD */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Select the nodes that the upcoming expedited grace period needs | ||||
|  * to wait for. | ||||
|  | @ -541,7 +486,7 @@ static void sync_rcu_exp_select_cpus(void) | |||
| 		rnp->exp_need_flush = false; | ||||
| 		if (!READ_ONCE(rnp->expmask)) | ||||
| 			continue; /* Avoid early boot non-existent wq. */ | ||||
| 		if (!rcu_gp_par_worker_started() || | ||||
| 		if (!rcu_exp_par_worker_started(rnp) || | ||||
| 		    rcu_scheduler_active != RCU_SCHEDULER_RUNNING || | ||||
| 		    rcu_is_last_leaf_node(rnp)) { | ||||
| 			/* No worker started yet or last leaf, do direct call. */ | ||||
|  | @ -956,7 +901,6 @@ static void rcu_exp_print_detail_task_stall_rnp(struct rcu_node *rnp) | |||
|  */ | ||||
| void synchronize_rcu_expedited(void) | ||||
| { | ||||
| 	bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); | ||||
| 	unsigned long flags; | ||||
| 	struct rcu_exp_work rew; | ||||
| 	struct rcu_node *rnp; | ||||
|  | @ -996,7 +940,7 @@ void synchronize_rcu_expedited(void) | |||
| 		return;  /* Someone else did our work for us. */ | ||||
| 
 | ||||
| 	/* Ensure that load happens before action based on it. */ | ||||
| 	if (unlikely(boottime)) { | ||||
| 	if (unlikely((rcu_scheduler_active == RCU_SCHEDULER_INIT) || !rcu_exp_worker_started())) { | ||||
| 		/* Direct call during scheduler init and early_initcalls(). */ | ||||
| 		rcu_exp_sel_wait_wake(s); | ||||
| 	} else { | ||||
|  | @ -1013,9 +957,6 @@ void synchronize_rcu_expedited(void) | |||
| 
 | ||||
| 	/* Let the next expedited grace period start. */ | ||||
| 	mutex_unlock(&rcu_state.exp_mutex); | ||||
| 
 | ||||
| 	if (likely(!boottime)) | ||||
| 		synchronize_rcu_expedited_destroy_work(&rew); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||||
| 
 | ||||
|  |  | |||
|  | @ -256,6 +256,7 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) | |||
| 	return __wake_nocb_gp(rdp_gp, rdp, force, flags); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_LAZY | ||||
| /*
 | ||||
|  * LAZY_FLUSH_JIFFIES decides the maximum amount of time that | ||||
|  * can elapse before lazy callbacks are flushed. Lazy callbacks | ||||
|  | @ -264,21 +265,20 @@ static bool wake_nocb_gp(struct rcu_data *rdp, bool force) | |||
|  * left unsubmitted to RCU after those many jiffies. | ||||
|  */ | ||||
| #define LAZY_FLUSH_JIFFIES (10 * HZ) | ||||
| static unsigned long jiffies_till_flush = LAZY_FLUSH_JIFFIES; | ||||
| static unsigned long jiffies_lazy_flush = LAZY_FLUSH_JIFFIES; | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_LAZY | ||||
| // To be called only from test code.
 | ||||
| void rcu_lazy_set_jiffies_till_flush(unsigned long jif) | ||||
| void rcu_set_jiffies_lazy_flush(unsigned long jif) | ||||
| { | ||||
| 	jiffies_till_flush = jif; | ||||
| 	jiffies_lazy_flush = jif; | ||||
| } | ||||
| EXPORT_SYMBOL(rcu_lazy_set_jiffies_till_flush); | ||||
| EXPORT_SYMBOL(rcu_set_jiffies_lazy_flush); | ||||
| 
 | ||||
| unsigned long rcu_lazy_get_jiffies_till_flush(void) | ||||
| unsigned long rcu_get_jiffies_lazy_flush(void) | ||||
| { | ||||
| 	return jiffies_till_flush; | ||||
| 	return jiffies_lazy_flush; | ||||
| } | ||||
| EXPORT_SYMBOL(rcu_lazy_get_jiffies_till_flush); | ||||
| EXPORT_SYMBOL(rcu_get_jiffies_lazy_flush); | ||||
| #endif | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -299,7 +299,7 @@ static void wake_nocb_gp_defer(struct rcu_data *rdp, int waketype, | |||
| 	 */ | ||||
| 	if (waketype == RCU_NOCB_WAKE_LAZY && | ||||
| 	    rdp->nocb_defer_wakeup == RCU_NOCB_WAKE_NOT) { | ||||
| 		mod_timer(&rdp_gp->nocb_timer, jiffies + jiffies_till_flush); | ||||
| 		mod_timer(&rdp_gp->nocb_timer, jiffies + rcu_get_jiffies_lazy_flush()); | ||||
| 		WRITE_ONCE(rdp_gp->nocb_defer_wakeup, waketype); | ||||
| 	} else if (waketype == RCU_NOCB_WAKE_BYPASS) { | ||||
| 		mod_timer(&rdp_gp->nocb_timer, jiffies + 2); | ||||
|  | @ -482,7 +482,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | |||
| 	// flush ->nocb_bypass to ->cblist.
 | ||||
| 	if ((ncbs && !bypass_is_lazy && j != READ_ONCE(rdp->nocb_bypass_first)) || | ||||
| 	    (ncbs &&  bypass_is_lazy && | ||||
| 	     (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush))) || | ||||
| 	     (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()))) || | ||||
| 	    ncbs >= qhimark) { | ||||
| 		rcu_nocb_lock(rdp); | ||||
| 		*was_alldone = !rcu_segcblist_pend_cbs(&rdp->cblist); | ||||
|  | @ -532,9 +532,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | |||
| 	// 2. Both of these conditions are met:
 | ||||
| 	//    a. The bypass list previously had only lazy CBs, and:
 | ||||
| 	//    b. The new CB is non-lazy.
 | ||||
| 	if (ncbs && (!bypass_is_lazy || lazy)) { | ||||
| 		local_irq_restore(flags); | ||||
| 	} else { | ||||
| 	if (!ncbs || (bypass_is_lazy && !lazy)) { | ||||
| 		// No-CBs GP kthread might be indefinitely asleep, if so, wake.
 | ||||
| 		rcu_nocb_lock(rdp); // Rare during call_rcu() flood.
 | ||||
| 		if (!rcu_segcblist_pend_cbs(&rdp->cblist)) { | ||||
|  | @ -544,7 +542,7 @@ static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | |||
| 		} else { | ||||
| 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | ||||
| 					    TPS("FirstBQnoWake")); | ||||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 			rcu_nocb_unlock(rdp); | ||||
| 		} | ||||
| 	} | ||||
| 	return true; // Callback already enqueued.
 | ||||
|  | @ -566,11 +564,12 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, | |||
| 	long lazy_len; | ||||
| 	long len; | ||||
| 	struct task_struct *t; | ||||
| 	struct rcu_data *rdp_gp = rdp->nocb_gp_rdp; | ||||
| 
 | ||||
| 	// If we are being polled or there is no kthread, just leave.
 | ||||
| 	t = READ_ONCE(rdp->nocb_gp_kthread); | ||||
| 	if (rcu_nocb_poll || !t) { | ||||
| 		rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 		rcu_nocb_unlock(rdp); | ||||
| 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | ||||
| 				    TPS("WakeNotPoll")); | ||||
| 		return; | ||||
|  | @ -583,17 +582,17 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, | |||
| 		rdp->qlen_last_fqs_check = len; | ||||
| 		// Only lazy CBs in bypass list
 | ||||
| 		if (lazy_len && bypass_len == lazy_len) { | ||||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 			rcu_nocb_unlock(rdp); | ||||
| 			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_LAZY, | ||||
| 					   TPS("WakeLazy")); | ||||
| 		} else if (!irqs_disabled_flags(flags)) { | ||||
| 			/* ... if queue was empty ... */ | ||||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 			rcu_nocb_unlock(rdp); | ||||
| 			wake_nocb_gp(rdp, false); | ||||
| 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, | ||||
| 					    TPS("WakeEmpty")); | ||||
| 		} else { | ||||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 			rcu_nocb_unlock(rdp); | ||||
| 			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE, | ||||
| 					   TPS("WakeEmptyIsDeferred")); | ||||
| 		} | ||||
|  | @ -610,20 +609,32 @@ static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_alldone, | |||
| 		smp_mb(); /* Enqueue before timer_pending(). */ | ||||
| 		if ((rdp->nocb_cb_sleep || | ||||
| 		     !rcu_segcblist_ready_cbs(&rdp->cblist)) && | ||||
| 		    !timer_pending(&rdp->nocb_timer)) { | ||||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 		    !timer_pending(&rdp_gp->nocb_timer)) { | ||||
| 			rcu_nocb_unlock(rdp); | ||||
| 			wake_nocb_gp_defer(rdp, RCU_NOCB_WAKE_FORCE, | ||||
| 					   TPS("WakeOvfIsDeferred")); | ||||
| 		} else { | ||||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 			rcu_nocb_unlock(rdp); | ||||
| 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); | ||||
| 		} | ||||
| 	} else { | ||||
| 		rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 		rcu_nocb_unlock(rdp); | ||||
| 		trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WakeNot")); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, | ||||
| 			  rcu_callback_t func, unsigned long flags, bool lazy) | ||||
| { | ||||
| 	bool was_alldone; | ||||
| 
 | ||||
| 	if (!rcu_nocb_try_bypass(rdp, head, &was_alldone, flags, lazy)) { | ||||
| 		/* Not enqueued on bypass but locked, do regular enqueue */ | ||||
| 		rcutree_enqueue(rdp, head, func); | ||||
| 		__call_rcu_nocb_wake(rdp, was_alldone, flags); /* unlocks */ | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| static int nocb_gp_toggle_rdp(struct rcu_data *rdp, | ||||
| 			       bool *wake_state) | ||||
| { | ||||
|  | @ -723,7 +734,7 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) | |||
| 		lazy_ncbs = READ_ONCE(rdp->lazy_len); | ||||
| 
 | ||||
| 		if (bypass_ncbs && (lazy_ncbs == bypass_ncbs) && | ||||
| 		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + jiffies_till_flush) || | ||||
| 		    (time_after(j, READ_ONCE(rdp->nocb_bypass_first) + rcu_get_jiffies_lazy_flush()) || | ||||
| 		     bypass_ncbs > 2 * qhimark)) { | ||||
| 			flush_bypass = true; | ||||
| 		} else if (bypass_ncbs && (lazy_ncbs != bypass_ncbs) && | ||||
|  | @ -779,7 +790,6 @@ static void nocb_gp_wait(struct rcu_data *my_rdp) | |||
| 		if (rcu_segcblist_ready_cbs(&rdp->cblist)) { | ||||
| 			needwake = rdp->nocb_cb_sleep; | ||||
| 			WRITE_ONCE(rdp->nocb_cb_sleep, false); | ||||
| 			smp_mb(); /* CB invocation -after- GP end. */ | ||||
| 		} else { | ||||
| 			needwake = false; | ||||
| 		} | ||||
|  | @ -933,8 +943,7 @@ static void nocb_cb_wait(struct rcu_data *rdp) | |||
| 		swait_event_interruptible_exclusive(rdp->nocb_cb_wq, | ||||
| 						    nocb_cb_wait_cond(rdp)); | ||||
| 
 | ||||
| 		// VVV Ensure CB invocation follows _sleep test.
 | ||||
| 		if (smp_load_acquire(&rdp->nocb_cb_sleep)) { // ^^^
 | ||||
| 		if (READ_ONCE(rdp->nocb_cb_sleep)) { | ||||
| 			WARN_ON(signal_pending(current)); | ||||
| 			trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeEmpty")); | ||||
| 		} | ||||
|  | @ -1383,7 +1392,7 @@ lazy_rcu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
| 			rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 			continue; | ||||
| 		} | ||||
| 		WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies, false)); | ||||
| 		rcu_nocb_try_flush_bypass(rdp, jiffies); | ||||
| 		rcu_nocb_unlock_irqrestore(rdp, flags); | ||||
| 		wake_nocb_gp(rdp, false); | ||||
| 		sc->nr_to_scan -= _count; | ||||
|  | @ -1768,10 +1777,10 @@ static bool rcu_nocb_flush_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | |||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| static bool rcu_nocb_try_bypass(struct rcu_data *rdp, struct rcu_head *rhp, | ||||
| 				bool *was_alldone, unsigned long flags, bool lazy) | ||||
| static void call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *head, | ||||
| 			  rcu_callback_t func, unsigned long flags, bool lazy) | ||||
| { | ||||
| 	return false; | ||||
| 	WARN_ON_ONCE(1);  /* Should be dead code! */ | ||||
| } | ||||
| 
 | ||||
| static void __call_rcu_nocb_wake(struct rcu_data *rdp, bool was_empty, | ||||
|  |  | |||
|  | @ -1195,14 +1195,13 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) | |||
| 	struct sched_param sp; | ||||
| 	struct task_struct *t; | ||||
| 
 | ||||
| 	mutex_lock(&rnp->boost_kthread_mutex); | ||||
| 	if (rnp->boost_kthread_task || !rcu_scheduler_fully_active) | ||||
| 		goto out; | ||||
| 	if (rnp->boost_kthread_task) | ||||
| 		return; | ||||
| 
 | ||||
| 	t = kthread_create(rcu_boost_kthread, (void *)rnp, | ||||
| 			   "rcub/%d", rnp_index); | ||||
| 	if (WARN_ON_ONCE(IS_ERR(t))) | ||||
| 		goto out; | ||||
| 		return; | ||||
| 
 | ||||
| 	raw_spin_lock_irqsave_rcu_node(rnp, flags); | ||||
| 	rnp->boost_kthread_task = t; | ||||
|  | @ -1210,48 +1209,11 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) | |||
| 	sp.sched_priority = kthread_prio; | ||||
| 	sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); | ||||
| 	wake_up_process(t); /* get to TASK_INTERRUPTIBLE quickly. */ | ||||
| 
 | ||||
|  out: | ||||
| 	mutex_unlock(&rnp->boost_kthread_mutex); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Set the per-rcu_node kthread's affinity to cover all CPUs that are | ||||
|  * served by the rcu_node in question.  The CPU hotplug lock is still | ||||
|  * held, so the value of rnp->qsmaskinit will be stable. | ||||
|  * | ||||
|  * We don't include outgoingcpu in the affinity set, use -1 if there is | ||||
|  * no outgoing CPU.  If there are no CPUs left in the affinity set, | ||||
|  * this function allows the kthread to execute on any CPU. | ||||
|  * | ||||
|  * Any future concurrent calls are serialized via ->boost_kthread_mutex. | ||||
|  */ | ||||
| static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | ||||
| static struct task_struct *rcu_boost_task(struct rcu_node *rnp) | ||||
| { | ||||
| 	struct task_struct *t = rnp->boost_kthread_task; | ||||
| 	unsigned long mask; | ||||
| 	cpumask_var_t cm; | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	if (!t) | ||||
| 		return; | ||||
| 	if (!zalloc_cpumask_var(&cm, GFP_KERNEL)) | ||||
| 		return; | ||||
| 	mutex_lock(&rnp->boost_kthread_mutex); | ||||
| 	mask = rcu_rnp_online_cpus(rnp); | ||||
| 	for_each_leaf_node_possible_cpu(rnp, cpu) | ||||
| 		if ((mask & leaf_node_cpu_bit(rnp, cpu)) && | ||||
| 		    cpu != outgoingcpu) | ||||
| 			cpumask_set_cpu(cpu, cm); | ||||
| 	cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); | ||||
| 	if (cpumask_empty(cm)) { | ||||
| 		cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); | ||||
| 		if (outgoingcpu >= 0) | ||||
| 			cpumask_clear_cpu(outgoingcpu, cm); | ||||
| 	} | ||||
| 	set_cpus_allowed_ptr(t, cm); | ||||
| 	mutex_unlock(&rnp->boost_kthread_mutex); | ||||
| 	free_cpumask_var(cm); | ||||
| 	return READ_ONCE(rnp->boost_kthread_task); | ||||
| } | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_RCU_BOOST */ | ||||
|  | @ -1270,10 +1232,10 @@ static void rcu_spawn_one_boost_kthread(struct rcu_node *rnp) | |||
| { | ||||
| } | ||||
| 
 | ||||
| static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) | ||||
| static struct task_struct *rcu_boost_task(struct rcu_node *rnp) | ||||
| { | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_RCU_BOOST */ | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Linus Torvalds
						Linus Torvalds