mirror of
				git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
				synced 2025-09-18 22:14:16 +00:00 
			
		
		
		
	Merge branch 'for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into core/rcu
Pull RCU updates from Paul E. McKenney: - Documentation updates. - Miscellaneous fixes. - Parallelize SRCU callback handling (plus overlapping patches). Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
		
						commit
						58d30c36d4
					
				
					 71 changed files with 3637 additions and 1116 deletions
				
			
		|  | @ -17,7 +17,7 @@ rcu_dereference.txt | |||
| rcubarrier.txt | ||||
| 	- RCU and Unloadable Modules | ||||
| rculist_nulls.txt | ||||
| 	- RCU list primitives for use with SLAB_DESTROY_BY_RCU | ||||
| 	- RCU list primitives for use with SLAB_TYPESAFE_BY_RCU | ||||
| rcuref.txt | ||||
| 	- Reference-count design for elements of lists/arrays protected by RCU | ||||
| rcu.txt | ||||
|  |  | |||
|  | @ -19,6 +19,8 @@ to each other. | |||
| 	The <tt>rcu_state</tt> Structure</a> | ||||
| <li>	<a href="#The rcu_node Structure"> | ||||
| 	The <tt>rcu_node</tt> Structure</a> | ||||
| <li>	<a href="#The rcu_segcblist Structure"> | ||||
| 	The <tt>rcu_segcblist</tt> Structure</a> | ||||
| <li>	<a href="#The rcu_data Structure"> | ||||
| 	The <tt>rcu_data</tt> Structure</a> | ||||
| <li>	<a href="#The rcu_dynticks Structure"> | ||||
|  | @ -841,6 +843,134 @@ for lockdep lock-class names. | |||
| Finally, lines 64-66 produce an error if the maximum number of | ||||
| CPUs is too large for the specified fanout. | ||||
| 
 | ||||
| <h3><a name="The rcu_segcblist Structure"> | ||||
| The <tt>rcu_segcblist</tt> Structure</a></h3> | ||||
| 
 | ||||
| The <tt>rcu_segcblist</tt> structure maintains a segmented list of | ||||
| callbacks as follows: | ||||
| 
 | ||||
| <pre> | ||||
|  1 #define RCU_DONE_TAIL        0 | ||||
|  2 #define RCU_WAIT_TAIL        1 | ||||
|  3 #define RCU_NEXT_READY_TAIL  2 | ||||
|  4 #define RCU_NEXT_TAIL        3 | ||||
|  5 #define RCU_CBLIST_NSEGS     4 | ||||
|  6 | ||||
|  7 struct rcu_segcblist { | ||||
|  8   struct rcu_head *head; | ||||
|  9   struct rcu_head **tails[RCU_CBLIST_NSEGS]; | ||||
| 10   unsigned long gp_seq[RCU_CBLIST_NSEGS]; | ||||
| 11   long len; | ||||
| 12   long len_lazy; | ||||
| 13 }; | ||||
| </pre> | ||||
| 
 | ||||
| <p> | ||||
| The segments are as follows: | ||||
| 
 | ||||
| <ol> | ||||
| <li>	<tt>RCU_DONE_TAIL</tt>: Callbacks whose grace periods have elapsed. | ||||
| 	These callbacks are ready to be invoked. | ||||
| <li>	<tt>RCU_WAIT_TAIL</tt>: Callbacks that are waiting for the | ||||
| 	current grace period. | ||||
| 	Note that different CPUs can have different ideas about which | ||||
| 	grace period is current, hence the <tt>->gp_seq</tt> field. | ||||
| <li>	<tt>RCU_NEXT_READY_TAIL</tt>: Callbacks waiting for the next | ||||
| 	grace period to start. | ||||
| <li>	<tt>RCU_NEXT_TAIL</tt>: Callbacks that have not yet been | ||||
| 	associated with a grace period. | ||||
| </ol> | ||||
| 
 | ||||
| <p> | ||||
| The <tt>->head</tt> pointer references the first callback or | ||||
| is <tt>NULL</tt> if the list contains no callbacks (which is | ||||
| <i>not</i> the same as being empty). | ||||
| Each element of the <tt>->tails[]</tt> array references the | ||||
| <tt>->next</tt> pointer of the last callback in the corresponding | ||||
| segment of the list, or the list's <tt>->head</tt> pointer if | ||||
| that segment and all previous segments are empty. | ||||
| If the corresponding segment is empty but some previous segment is | ||||
| not empty, then the array element is identical to its predecessor. | ||||
| Older callbacks are closer to the head of the list, and new callbacks | ||||
| are added at the tail. | ||||
| This relationship between the <tt>->head</tt> pointer, the | ||||
| <tt>->tails[]</tt> array, and the callbacks is shown in this | ||||
| diagram: | ||||
| 
 | ||||
| </p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%"> | ||||
| 
 | ||||
| </p><p>In this figure, the <tt>->head</tt> pointer references the | ||||
| first | ||||
| RCU callback in the list. | ||||
| The <tt>->tails[RCU_DONE_TAIL]</tt> array element references | ||||
| the <tt>->head</tt> pointer itself, indicating that none | ||||
| of the callbacks is ready to invoke. | ||||
| The <tt>->tails[RCU_WAIT_TAIL]</tt> array element references callback | ||||
| CB 2's <tt>->next</tt> pointer, which indicates that | ||||
| CB 1 and CB 2 are both waiting on the current grace period, | ||||
| give or take possible disagreements about exactly which grace period | ||||
| is the current one. | ||||
| The <tt>->tails[RCU_NEXT_READY_TAIL]</tt> array element | ||||
| references the same RCU callback that <tt>->tails[RCU_WAIT_TAIL]</tt> | ||||
| does, which indicates that there are no callbacks waiting on the next | ||||
| RCU grace period. | ||||
| The <tt>->tails[RCU_NEXT_TAIL]</tt> array element references | ||||
| CB 4's <tt>->next</tt> pointer, indicating that all the | ||||
| remaining RCU callbacks have not yet been assigned to an RCU grace | ||||
| period. | ||||
| Note that the <tt>->tails[RCU_NEXT_TAIL]</tt> array element | ||||
| always references the last RCU callback's <tt>->next</tt> pointer | ||||
| unless the callback list is empty, in which case it references | ||||
| the <tt>->head</tt> pointer. | ||||
| 
 | ||||
| <p> | ||||
| There is one additional important special case for the | ||||
| <tt>->tails[RCU_NEXT_TAIL]</tt> array element: It can be <tt>NULL</tt> | ||||
| when this list is <i>disabled</i>. | ||||
| Lists are disabled when the corresponding CPU is offline or when | ||||
| the corresponding CPU's callbacks are offloaded to a kthread, | ||||
| both of which are described elsewhere. | ||||
| 
 | ||||
| </p><p>CPUs advance their callbacks from the | ||||
| <tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the | ||||
| <tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments | ||||
| as grace periods advance. | ||||
| 
 | ||||
| </p><p>The <tt>->gp_seq[]</tt> array records grace-period | ||||
| numbers corresponding to the list segments. | ||||
| This is what allows different CPUs to have different ideas as to | ||||
| which is the current grace period while still avoiding premature | ||||
| invocation of their callbacks. | ||||
| In particular, this allows CPUs that go idle for extended periods | ||||
| to determine which of their callbacks are ready to be invoked after | ||||
| reawakening. | ||||
| 
 | ||||
| </p><p>The <tt>->len</tt> counter contains the number of | ||||
| callbacks in <tt>->head</tt>, and the | ||||
| <tt>->len_lazy</tt> contains the number of those callbacks that | ||||
| are known to only free memory, and whose invocation can therefore | ||||
| be safely deferred. | ||||
| 
 | ||||
| <p><b>Important note</b>: It is the <tt>->len</tt> field that | ||||
| determines whether or not there are callbacks associated with | ||||
| this <tt>rcu_segcblist</tt> structure, <i>not</i> the <tt>->head</tt> | ||||
| pointer. | ||||
| The reason for this is that all the ready-to-invoke callbacks | ||||
| (that is, those in the <tt>RCU_DONE_TAIL</tt> segment) are extracted | ||||
| all at once at callback-invocation time. | ||||
| If callback invocation must be postponed, for example, because a | ||||
| high-priority process just woke up on this CPU, then the remaining | ||||
| callbacks are placed back on the <tt>RCU_DONE_TAIL</tt> segment. | ||||
| Either way, the <tt>->len</tt> and <tt>->len_lazy</tt> counts | ||||
| are adjusted after the corresponding callbacks have been invoked, and so | ||||
| again it is the <tt>->len</tt> count that accurately reflects whether | ||||
| or not there are callbacks associated with this <tt>rcu_segcblist</tt> | ||||
| structure. | ||||
| Of course, off-CPU sampling of the <tt>->len</tt> count requires | ||||
| the use of appropriate synchronization, for example, memory barriers. | ||||
| This synchronization can be a bit subtle, particularly in the case | ||||
| of <tt>rcu_barrier()</tt>. | ||||
| 
 | ||||
| <h3><a name="The rcu_data Structure"> | ||||
| The <tt>rcu_data</tt> Structure</a></h3> | ||||
| 
 | ||||
|  | @ -983,62 +1113,18 @@ choice. | |||
| as follows: | ||||
| 
 | ||||
| <pre> | ||||
|  1 struct rcu_head *nxtlist; | ||||
|  2 struct rcu_head **nxttail[RCU_NEXT_SIZE]; | ||||
|  3 unsigned long nxtcompleted[RCU_NEXT_SIZE]; | ||||
|  4 long qlen_lazy; | ||||
|  5 long qlen; | ||||
|  6 long qlen_last_fqs_check; | ||||
|  1 struct rcu_segcblist cblist; | ||||
|  2 long qlen_last_fqs_check; | ||||
|  3 unsigned long n_cbs_invoked; | ||||
|  4 unsigned long n_nocbs_invoked; | ||||
|  5 unsigned long n_cbs_orphaned; | ||||
|  6 unsigned long n_cbs_adopted; | ||||
|  7 unsigned long n_force_qs_snap; | ||||
|  8 unsigned long n_cbs_invoked; | ||||
|  9 unsigned long n_cbs_orphaned; | ||||
| 10 unsigned long n_cbs_adopted; | ||||
| 11 long blimit; | ||||
|  8 long blimit; | ||||
| </pre> | ||||
| 
 | ||||
| <p>The <tt>->nxtlist</tt> pointer and the | ||||
| <tt>->nxttail[]</tt> array form a four-segment list with | ||||
| older callbacks near the head and newer ones near the tail. | ||||
| Each segment contains callbacks with the corresponding relationship | ||||
| to the current grace period. | ||||
| The pointer out of the end of each of the four segments is referenced | ||||
| by the element of the <tt>->nxttail[]</tt> array indexed by | ||||
| <tt>RCU_DONE_TAIL</tt> (for callbacks handled by a prior grace period), | ||||
| <tt>RCU_WAIT_TAIL</tt> (for callbacks waiting on the current grace period), | ||||
| <tt>RCU_NEXT_READY_TAIL</tt> (for callbacks that will wait on the next | ||||
| grace period), and | ||||
| <tt>RCU_NEXT_TAIL</tt> (for callbacks that are not yet associated | ||||
| with a specific grace period) | ||||
| respectively, as shown in the following figure. | ||||
| 
 | ||||
| </p><p><img src="nxtlist.svg" alt="nxtlist.svg" width="40%"> | ||||
| 
 | ||||
| </p><p>In this figure, the <tt>->nxtlist</tt> pointer references the | ||||
| first | ||||
| RCU callback in the list. | ||||
| The <tt>->nxttail[RCU_DONE_TAIL]</tt> array element references | ||||
| the <tt>->nxtlist</tt> pointer itself, indicating that none | ||||
| of the callbacks is ready to invoke. | ||||
| The <tt>->nxttail[RCU_WAIT_TAIL]</tt> array element references callback | ||||
| CB 2's <tt>->next</tt> pointer, which indicates that | ||||
| CB 1 and CB 2 are both waiting on the current grace period. | ||||
| The <tt>->nxttail[RCU_NEXT_READY_TAIL]</tt> array element | ||||
| references the same RCU callback that <tt>->nxttail[RCU_WAIT_TAIL]</tt> | ||||
| does, which indicates that there are no callbacks waiting on the next | ||||
| RCU grace period. | ||||
| The <tt>->nxttail[RCU_NEXT_TAIL]</tt> array element references | ||||
| CB 4's <tt>->next</tt> pointer, indicating that all the | ||||
| remaining RCU callbacks have not yet been assigned to an RCU grace | ||||
| period. | ||||
| Note that the <tt>->nxttail[RCU_NEXT_TAIL]</tt> array element | ||||
| always references the last RCU callback's <tt>->next</tt> pointer | ||||
| unless the callback list is empty, in which case it references | ||||
| the <tt>->nxtlist</tt> pointer. | ||||
| 
 | ||||
| </p><p>CPUs advance their callbacks from the | ||||
| <tt>RCU_NEXT_TAIL</tt> to the <tt>RCU_NEXT_READY_TAIL</tt> to the | ||||
| <tt>RCU_WAIT_TAIL</tt> to the <tt>RCU_DONE_TAIL</tt> list segments | ||||
| as grace periods advance. | ||||
| <p>The <tt>->cblist</tt> structure is the segmented callback list | ||||
| described earlier. | ||||
| The CPU advances the callbacks in its <tt>rcu_data</tt> structure | ||||
| whenever it notices that another RCU grace period has completed. | ||||
| The CPU detects the completion of an RCU grace period by noticing | ||||
|  | @ -1049,16 +1135,7 @@ Recall that each <tt>rcu_node</tt> structure's | |||
| <tt>->completed</tt> field is updated at the end of each | ||||
| grace period. | ||||
| 
 | ||||
| </p><p>The <tt>->nxtcompleted[]</tt> array records grace-period | ||||
| numbers corresponding to the list segments. | ||||
| This allows CPUs that go idle for extended periods to determine | ||||
| which of their callbacks are ready to be invoked after reawakening. | ||||
| 
 | ||||
| </p><p>The <tt>->qlen</tt> counter contains the number of | ||||
| callbacks in <tt>->nxtlist</tt>, and the | ||||
| <tt>->qlen_lazy</tt> contains the number of those callbacks that | ||||
| are known to only free memory, and whose invocation can therefore | ||||
| be safely deferred. | ||||
| <p> | ||||
| The <tt>->qlen_last_fqs_check</tt> and | ||||
| <tt>->n_force_qs_snap</tt> coordinate the forcing of quiescent | ||||
| states from <tt>call_rcu()</tt> and friends when callback | ||||
|  | @ -1069,6 +1146,10 @@ lists grow excessively long. | |||
| fields count the number of callbacks invoked, | ||||
| sent to other CPUs when this CPU goes offline, | ||||
| and received from other CPUs when those other CPUs go offline. | ||||
| The <tt>->n_nocbs_invoked</tt> is used when the CPU's callbacks | ||||
| are offloaded to a kthread. | ||||
| 
 | ||||
| <p> | ||||
| Finally, the <tt>->blimit</tt> counter is the maximum number of | ||||
| RCU callbacks that may be invoked at a given time. | ||||
| 
 | ||||
|  | @ -1104,6 +1185,9 @@ Its fields are as follows: | |||
|   1   int dynticks_nesting; | ||||
|   2   int dynticks_nmi_nesting; | ||||
|   3   atomic_t dynticks; | ||||
|   4   bool rcu_need_heavy_qs; | ||||
|   5   unsigned long rcu_qs_ctr; | ||||
|   6   bool rcu_urgent_qs; | ||||
| </pre> | ||||
| 
 | ||||
| <p>The <tt>->dynticks_nesting</tt> field counts the | ||||
|  | @ -1117,11 +1201,32 @@ NMIs are counted by the <tt>->dynticks_nmi_nesting</tt> | |||
| field, except that NMIs that interrupt non-dyntick-idle execution | ||||
| are not counted. | ||||
| 
 | ||||
| </p><p>Finally, the <tt>->dynticks</tt> field counts the corresponding | ||||
| </p><p>The <tt>->dynticks</tt> field counts the corresponding | ||||
| CPU's transitions to and from dyntick-idle mode, so that this counter | ||||
| has an even value when the CPU is in dyntick-idle mode and an odd | ||||
| value otherwise. | ||||
| 
 | ||||
| </p><p>The <tt>->rcu_need_heavy_qs</tt> field is used | ||||
| to record the fact that the RCU core code would really like to | ||||
| see a quiescent state from the corresponding CPU, so much so that | ||||
| it is willing to call for heavy-weight dyntick-counter operations. | ||||
| This flag is checked by RCU's context-switch and <tt>cond_resched()</tt> | ||||
| code, which provide a momentary idle sojourn in response. | ||||
| 
 | ||||
| </p><p>The <tt>->rcu_qs_ctr</tt> field is used to record | ||||
| quiescent states from <tt>cond_resched()</tt>. | ||||
| Because <tt>cond_resched()</tt> can execute quite frequently, this | ||||
| must be quite lightweight, as in a non-atomic increment of this | ||||
| per-CPU field. | ||||
| 
 | ||||
| </p><p>Finally, the <tt>->rcu_urgent_qs</tt> field is used to record | ||||
| the fact that the RCU core code would really like to see a quiescent | ||||
| state from the corresponding CPU, with the various other fields indicating | ||||
| just how badly RCU wants this quiescent state. | ||||
| This flag is checked by RCU's context-switch and <tt>cond_resched()</tt> | ||||
| code, which, if nothing else, non-atomically increment <tt>->rcu_qs_ctr</tt> | ||||
| in response. | ||||
| 
 | ||||
| <table> | ||||
| <tr><th> </th></tr> | ||||
| <tr><th align="left">Quick Quiz:</th></tr> | ||||
|  |  | |||
|  | @ -19,7 +19,7 @@ | |||
|    id="svg2" | ||||
|    version="1.1" | ||||
|    inkscape:version="0.48.4 r9939" | ||||
|    sodipodi:docname="nxtlist.fig"> | ||||
|    sodipodi:docname="segcblist.svg"> | ||||
|   <metadata | ||||
|      id="metadata94"> | ||||
|     <rdf:RDF> | ||||
|  | @ -28,7 +28,7 @@ | |||
|         <dc:format>image/svg+xml</dc:format> | ||||
|         <dc:type | ||||
|            rdf:resource="http://purl.org/dc/dcmitype/StillImage" /> | ||||
|         <dc:title></dc:title> | ||||
|         <dc:title /> | ||||
|       </cc:Work> | ||||
|     </rdf:RDF> | ||||
|   </metadata> | ||||
|  | @ -241,61 +241,51 @@ | |||
|        xml:space="preserve" | ||||
|        x="225" | ||||
|        y="675" | ||||
|        fill="#000000" | ||||
|        font-family="Courier" | ||||
|        font-style="normal" | ||||
|        font-weight="bold" | ||||
|        font-size="324" | ||||
|        text-anchor="start" | ||||
|        id="text64">nxtlist</text> | ||||
|        id="text64" | ||||
|        style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->head</text> | ||||
|     <!-- Text --> | ||||
|     <text | ||||
|        xml:space="preserve" | ||||
|        x="225" | ||||
|        y="1800" | ||||
|        fill="#000000" | ||||
|        font-family="Courier" | ||||
|        font-style="normal" | ||||
|        font-weight="bold" | ||||
|        font-size="324" | ||||
|        text-anchor="start" | ||||
|        id="text66">nxttail[RCU_DONE_TAIL]</text> | ||||
|        id="text66" | ||||
|        style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_DONE_TAIL]</text> | ||||
|     <!-- Text --> | ||||
|     <text | ||||
|        xml:space="preserve" | ||||
|        x="225" | ||||
|        y="2925" | ||||
|        fill="#000000" | ||||
|        font-family="Courier" | ||||
|        font-style="normal" | ||||
|        font-weight="bold" | ||||
|        font-size="324" | ||||
|        text-anchor="start" | ||||
|        id="text68">nxttail[RCU_WAIT_TAIL]</text> | ||||
|        id="text68" | ||||
|        style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_WAIT_TAIL]</text> | ||||
|     <!-- Text --> | ||||
|     <text | ||||
|        xml:space="preserve" | ||||
|        x="225" | ||||
|        y="4050" | ||||
|        fill="#000000" | ||||
|        font-family="Courier" | ||||
|        font-style="normal" | ||||
|        font-weight="bold" | ||||
|        font-size="324" | ||||
|        text-anchor="start" | ||||
|        id="text70">nxttail[RCU_NEXT_READY_TAIL]</text> | ||||
|        id="text70" | ||||
|        style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_NEXT_READY_TAIL]</text> | ||||
|     <!-- Text --> | ||||
|     <text | ||||
|        xml:space="preserve" | ||||
|        x="225" | ||||
|        y="5175" | ||||
|        fill="#000000" | ||||
|        font-family="Courier" | ||||
|        font-style="normal" | ||||
|        font-weight="bold" | ||||
|        font-size="324" | ||||
|        text-anchor="start" | ||||
|        id="text72">nxttail[RCU_NEXT_TAIL]</text> | ||||
|        id="text72" | ||||
|        style="font-size:324px;font-style:normal;font-weight:bold;text-anchor:start;fill:#000000;font-family:Courier">->tails[RCU_NEXT_TAIL]</text> | ||||
|     <!-- Text --> | ||||
|     <text | ||||
|        xml:space="preserve" | ||||
|  |  | |||
| Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB | 
|  | @ -284,6 +284,7 @@ Expedited Grace Period Refinements</a></h2> | |||
| 	Funnel locking and wait/wakeup</a>. | ||||
| <li>	<a href="#Use of Workqueues">Use of Workqueues</a>. | ||||
| <li>	<a href="#Stall Warnings">Stall warnings</a>. | ||||
| <li>	<a href="#Mid-Boot Operation">Mid-boot operation</a>. | ||||
| </ol> | ||||
| 
 | ||||
| <h3><a name="Idle-CPU Checks">Idle-CPU Checks</a></h3> | ||||
|  | @ -524,7 +525,7 @@ their grace periods and carrying out their wakeups. | |||
| In earlier implementations, the task requesting the expedited | ||||
| grace period also drove it to completion. | ||||
| This straightforward approach had the disadvantage of needing to | ||||
| account for signals sent to user tasks, | ||||
| account for POSIX signals sent to user tasks, | ||||
| so more recent implemementations use the Linux kernel's | ||||
| <a href="https://www.kernel.org/doc/Documentation/workqueue.txt">workqueues</a>. | ||||
| 
 | ||||
|  | @ -533,8 +534,8 @@ The requesting task still does counter snapshotting and funnel-lock | |||
| processing, but the task reaching the top of the funnel lock | ||||
| does a <tt>schedule_work()</tt> (from <tt>_synchronize_rcu_expedited()</tt> | ||||
| so that a workqueue kthread does the actual grace-period processing. | ||||
| Because workqueue kthreads do not accept signals, grace-period-wait | ||||
| processing need not allow for signals. | ||||
| Because workqueue kthreads do not accept POSIX signals, grace-period-wait | ||||
| processing need not allow for POSIX signals. | ||||
| 
 | ||||
| In addition, this approach allows wakeups for the previous expedited | ||||
| grace period to be overlapped with processing for the next expedited | ||||
|  | @ -586,6 +587,46 @@ blocking the current grace period are printed. | |||
| Each stall warning results in another pass through the loop, but the | ||||
| second and subsequent passes use longer stall times. | ||||
| 
 | ||||
| <h3><a name="Mid-Boot Operation">Mid-boot operation</a></h3> | ||||
| 
 | ||||
| <p> | ||||
| The use of workqueues has the advantage that the expedited | ||||
| grace-period code need not worry about POSIX signals. | ||||
| Unfortunately, it has the | ||||
| corresponding disadvantage that workqueues cannot be used until | ||||
| they are initialized, which does not happen until some time after | ||||
| the scheduler spawns the first task. | ||||
| Given that there are parts of the kernel that really do want to | ||||
| execute grace periods during this mid-boot “dead zone”, | ||||
| expedited grace periods must do something else during thie time. | ||||
| 
 | ||||
| <p> | ||||
| What they do is to fall back to the old practice of requiring that the | ||||
| requesting task drive the expedited grace period, as was the case | ||||
| before the use of workqueues. | ||||
| However, the requesting task is only required to drive the grace period | ||||
| during the mid-boot dead zone. | ||||
| Before mid-boot, a synchronous grace period is a no-op. | ||||
| Some time after mid-boot, workqueues are used. | ||||
| 
 | ||||
| <p> | ||||
| Non-expedited non-SRCU synchronous grace periods must also operate | ||||
| normally during mid-boot. | ||||
| This is handled by causing non-expedited grace periods to take the | ||||
| expedited code path during mid-boot. | ||||
| 
 | ||||
| <p> | ||||
| The current code assumes that there are no POSIX signals during | ||||
| the mid-boot dead zone. | ||||
| However, if an overwhelming need for POSIX signals somehow arises, | ||||
| appropriate adjustments can be made to the expedited stall-warning code. | ||||
| One such adjustment would reinstate the pre-workqueue stall-warning | ||||
| checks, but only during the mid-boot dead zone. | ||||
| 
 | ||||
| <p> | ||||
| With this refinement, synchronous grace periods can now be used from | ||||
| task context pretty much any time during the life of the kernel. | ||||
| 
 | ||||
| <h3><a name="Summary"> | ||||
| Summary</a></h3> | ||||
| 
 | ||||
|  |  | |||
|  | @ -659,8 +659,9 @@ systems with more than one CPU: | |||
| 	In other words, a given instance of <tt>synchronize_rcu()</tt> | ||||
| 	can avoid waiting on a given RCU read-side critical section only | ||||
| 	if it can prove that <tt>synchronize_rcu()</tt> started first. | ||||
| 	</font> | ||||
| 
 | ||||
| 	<p> | ||||
| 	<p><font color="ffffff"> | ||||
| 	A related question is “When <tt>rcu_read_lock()</tt> | ||||
| 	doesn't generate any code, why does it matter how it relates | ||||
| 	to a grace period?” | ||||
|  | @ -675,8 +676,9 @@ systems with more than one CPU: | |||
| 	within the critical section, in which case none of the accesses | ||||
| 	within the critical section may observe the effects of any | ||||
| 	access following the grace period. | ||||
| 	</font> | ||||
| 
 | ||||
| 	<p> | ||||
| 	<p><font color="ffffff"> | ||||
| 	As of late 2016, mathematical models of RCU take this | ||||
| 	viewpoint, for example, see slides 62 and 63 | ||||
| 	of the | ||||
|  | @ -1616,8 +1618,8 @@ CPUs should at least make reasonable forward progress. | |||
| In return for its shorter latencies, <tt>synchronize_rcu_expedited()</tt> | ||||
| is permitted to impose modest degradation of real-time latency | ||||
| on non-idle online CPUs. | ||||
| That said, it will likely be necessary to take further steps to reduce this | ||||
| degradation, hopefully to roughly that of a scheduling-clock interrupt. | ||||
| Here, “modest” means roughly the same latency | ||||
| degradation as a scheduling-clock interrupt. | ||||
| 
 | ||||
| <p> | ||||
| There are a number of situations where even | ||||
|  | @ -1913,12 +1915,9 @@ This requirement is another factor driving batching of grace periods, | |||
| but it is also the driving force behind the checks for large numbers | ||||
| of queued RCU callbacks in the <tt>call_rcu()</tt> code path. | ||||
| Finally, high update rates should not delay RCU read-side critical | ||||
| sections, although some read-side delays can occur when using | ||||
| sections, although some small read-side delays can occur when using | ||||
| <tt>synchronize_rcu_expedited()</tt>, courtesy of this function's use | ||||
| of <tt>try_stop_cpus()</tt>. | ||||
| (In the future, <tt>synchronize_rcu_expedited()</tt> will be | ||||
| converted to use lighter-weight inter-processor interrupts (IPIs), | ||||
| but this will still disturb readers, though to a much smaller degree.) | ||||
| of <tt>smp_call_function_single()</tt>. | ||||
| 
 | ||||
| <p> | ||||
| Although all three of these corner cases were understood in the early | ||||
|  | @ -2154,7 +2153,8 @@ as will <tt>rcu_assign_pointer()</tt>. | |||
| <p> | ||||
| Although <tt>call_rcu()</tt> may be invoked at any | ||||
| time during boot, callbacks are not guaranteed to be invoked until after | ||||
| the scheduler is fully up and running. | ||||
| all of RCU's kthreads have been spawned, which occurs at | ||||
| <tt>early_initcall()</tt> time. | ||||
| This delay in callback invocation is due to the fact that RCU does not | ||||
| invoke callbacks until it is fully initialized, and this full initialization | ||||
| cannot occur until after the scheduler has initialized itself to the | ||||
|  | @ -2167,8 +2167,10 @@ on what operations those callbacks could invoke. | |||
| Perhaps surprisingly, <tt>synchronize_rcu()</tt>, | ||||
| <a href="#Bottom-Half Flavor"><tt>synchronize_rcu_bh()</tt></a> | ||||
| (<a href="#Bottom-Half Flavor">discussed below</a>), | ||||
| and | ||||
| <a href="#Sched Flavor"><tt>synchronize_sched()</tt></a> | ||||
| <a href="#Sched Flavor"><tt>synchronize_sched()</tt></a>, | ||||
| <tt>synchronize_rcu_expedited()</tt>, | ||||
| <tt>synchronize_rcu_bh_expedited()</tt>, and | ||||
| <tt>synchronize_sched_expedited()</tt> | ||||
| will all operate normally | ||||
| during very early boot, the reason being that there is only one CPU | ||||
| and preemption is disabled. | ||||
|  | @ -2178,45 +2180,59 @@ state and thus a grace period, so the early-boot implementation can | |||
| be a no-op. | ||||
| 
 | ||||
| <p> | ||||
| Both <tt>synchronize_rcu_bh()</tt> and <tt>synchronize_sched()</tt> | ||||
| continue to operate normally through the remainder of boot, courtesy | ||||
| of the fact that preemption is disabled across their RCU read-side | ||||
| critical sections and also courtesy of the fact that there is still | ||||
| only one CPU. | ||||
| However, once the scheduler starts initializing, preemption is enabled. | ||||
| There is still only a single CPU, but the fact that preemption is enabled | ||||
| means that the no-op implementation of <tt>synchronize_rcu()</tt> no | ||||
| longer works in <tt>CONFIG_PREEMPT=y</tt> kernels. | ||||
| Therefore, as soon as the scheduler starts initializing, the early-boot | ||||
| fastpath is disabled. | ||||
| This means that <tt>synchronize_rcu()</tt> switches to its runtime | ||||
| mode of operation where it posts callbacks, which in turn means that | ||||
| any call to <tt>synchronize_rcu()</tt> will block until the corresponding | ||||
| callback is invoked. | ||||
| Unfortunately, the callback cannot be invoked until RCU's runtime | ||||
| grace-period machinery is up and running, which cannot happen until | ||||
| the scheduler has initialized itself sufficiently to allow RCU's | ||||
| kthreads to be spawned. | ||||
| Therefore, invoking <tt>synchronize_rcu()</tt> during scheduler | ||||
| initialization can result in deadlock. | ||||
| However, once the scheduler has spawned its first kthread, this early | ||||
| boot trick fails for <tt>synchronize_rcu()</tt> (as well as for | ||||
| <tt>synchronize_rcu_expedited()</tt>) in <tt>CONFIG_PREEMPT=y</tt> | ||||
| kernels. | ||||
| The reason is that an RCU read-side critical section might be preempted, | ||||
| which means that a subsequent <tt>synchronize_rcu()</tt> really does have | ||||
| to wait for something, as opposed to simply returning immediately. | ||||
| Unfortunately, <tt>synchronize_rcu()</tt> can't do this until all of | ||||
| its kthreads are spawned, which doesn't happen until some time during | ||||
| <tt>early_initcalls()</tt> time. | ||||
| But this is no excuse:  RCU is nevertheless required to correctly handle | ||||
| synchronous grace periods during this time period. | ||||
| Once all of its kthreads are up and running, RCU starts running | ||||
| normally. | ||||
| 
 | ||||
| <table> | ||||
| <tr><th> </th></tr> | ||||
| <tr><th align="left">Quick Quiz:</th></tr> | ||||
| <tr><td> | ||||
| 	So what happens with <tt>synchronize_rcu()</tt> during | ||||
| 	scheduler initialization for <tt>CONFIG_PREEMPT=n</tt> | ||||
| 	kernels? | ||||
| 	How can RCU possibly handle grace periods before all of its | ||||
| 	kthreads have been spawned??? | ||||
| </td></tr> | ||||
| <tr><th align="left">Answer:</th></tr> | ||||
| <tr><td bgcolor="#ffffff"><font color="ffffff"> | ||||
| 	In <tt>CONFIG_PREEMPT=n</tt> kernel, <tt>synchronize_rcu()</tt> | ||||
| 	maps directly to <tt>synchronize_sched()</tt>. | ||||
| 	Therefore, <tt>synchronize_rcu()</tt> works normally throughout | ||||
| 	boot in <tt>CONFIG_PREEMPT=n</tt> kernels. | ||||
| 	However, your code must also work in <tt>CONFIG_PREEMPT=y</tt> kernels, | ||||
| 	so it is still necessary to avoid invoking <tt>synchronize_rcu()</tt> | ||||
| 	during scheduler initialization. | ||||
| 	Very carefully! | ||||
| 	</font> | ||||
| 
 | ||||
| 	<p><font color="ffffff"> | ||||
| 	During the “dead zone” between the time that the | ||||
| 	scheduler spawns the first task and the time that all of RCU's | ||||
| 	kthreads have been spawned, all synchronous grace periods are | ||||
| 	handled by the expedited grace-period mechanism. | ||||
| 	At runtime, this expedited mechanism relies on workqueues, but | ||||
| 	during the dead zone the requesting task itself drives the | ||||
| 	desired expedited grace period. | ||||
| 	Because dead-zone execution takes place within task context, | ||||
| 	everything works. | ||||
| 	Once the dead zone ends, expedited grace periods go back to | ||||
| 	using workqueues, as is required to avoid problems that would | ||||
| 	otherwise occur when a user task received a POSIX signal while | ||||
| 	driving an expedited grace period. | ||||
| 	</font> | ||||
| 
 | ||||
| 	<p><font color="ffffff"> | ||||
| 	And yes, this does mean that it is unhelpful to send POSIX | ||||
| 	signals to random tasks between the time that the scheduler | ||||
| 	spawns its first kthread and the time that RCU's kthreads | ||||
| 	have all been spawned. | ||||
| 	If there ever turns out to be a good reason for sending POSIX | ||||
| 	signals during that time, appropriate adjustments will be made. | ||||
| 	(If it turns out that POSIX signals are sent during this time for | ||||
| 	no good reason, other adjustments will be made, appropriate | ||||
| 	or otherwise.) | ||||
| </font></td></tr> | ||||
| <tr><td> </td></tr> | ||||
| </table> | ||||
|  | @ -2295,12 +2311,61 @@ situation, and Dipankar Sarma incorporated <tt>rcu_barrier()</tt> into RCU. | |||
| The need for <tt>rcu_barrier()</tt> for module unloading became | ||||
| apparent later. | ||||
| 
 | ||||
| <p> | ||||
| <b>Important note</b>: The <tt>rcu_barrier()</tt> function is not, | ||||
| repeat, <i>not</i>, obligated to wait for a grace period. | ||||
| It is instead only required to wait for RCU callbacks that have | ||||
| already been posted. | ||||
| Therefore, if there are no RCU callbacks posted anywhere in the system, | ||||
| <tt>rcu_barrier()</tt> is within its rights to return immediately. | ||||
| Even if there are callbacks posted, <tt>rcu_barrier()</tt> does not | ||||
| necessarily need to wait for a grace period. | ||||
| 
 | ||||
| <table> | ||||
| <tr><th> </th></tr> | ||||
| <tr><th align="left">Quick Quiz:</th></tr> | ||||
| <tr><td> | ||||
| 	Wait a minute! | ||||
| 	Each RCU callbacks must wait for a grace period to complete, | ||||
| 	and <tt>rcu_barrier()</tt> must wait for each pre-existing | ||||
| 	callback to be invoked. | ||||
| 	Doesn't <tt>rcu_barrier()</tt> therefore need to wait for | ||||
| 	a full grace period if there is even one callback posted anywhere | ||||
| 	in the system? | ||||
| </td></tr> | ||||
| <tr><th align="left">Answer:</th></tr> | ||||
| <tr><td bgcolor="#ffffff"><font color="ffffff"> | ||||
| 	Absolutely not!!! | ||||
| 	</font> | ||||
| 
 | ||||
| 	<p><font color="ffffff"> | ||||
| 	Yes, each RCU callbacks must wait for a grace period to complete, | ||||
| 	but it might well be partly (or even completely) finished waiting | ||||
| 	by the time <tt>rcu_barrier()</tt> is invoked. | ||||
| 	In that case, <tt>rcu_barrier()</tt> need only wait for the | ||||
| 	remaining portion of the grace period to elapse. | ||||
| 	So even if there are quite a few callbacks posted, | ||||
| 	<tt>rcu_barrier()</tt> might well return quite quickly. | ||||
| 	</font> | ||||
| 
 | ||||
| 	<p><font color="ffffff"> | ||||
| 	So if you need to wait for a grace period as well as for all | ||||
| 	pre-existing callbacks, you will need to invoke both | ||||
| 	<tt>synchronize_rcu()</tt> and <tt>rcu_barrier()</tt>. | ||||
| 	If latency is a concern, you can always use workqueues | ||||
| 	to invoke them concurrently. | ||||
| </font></td></tr> | ||||
| <tr><td> </td></tr> | ||||
| </table> | ||||
| 
 | ||||
| <h3><a name="Hotplug CPU">Hotplug CPU</a></h3> | ||||
| 
 | ||||
| <p> | ||||
| The Linux kernel supports CPU hotplug, which means that CPUs | ||||
| can come and go. | ||||
| It is of course illegal to use any RCU API member from an offline CPU. | ||||
| It is of course illegal to use any RCU API member from an offline CPU, | ||||
| with the exception of <a href="#Sleepable RCU">SRCU</a> read-side | ||||
| critical sections. | ||||
| This requirement was present from day one in DYNIX/ptx, but | ||||
| on the other hand, the Linux kernel's CPU-hotplug implementation | ||||
| is “interesting.” | ||||
|  | @ -2310,19 +2375,18 @@ The Linux-kernel CPU-hotplug implementation has notifiers that | |||
| are used to allow the various kernel subsystems (including RCU) | ||||
| to respond appropriately to a given CPU-hotplug operation. | ||||
| Most RCU operations may be invoked from CPU-hotplug notifiers, | ||||
| including even normal synchronous grace-period operations | ||||
| such as <tt>synchronize_rcu()</tt>. | ||||
| However, expedited grace-period operations such as | ||||
| <tt>synchronize_rcu_expedited()</tt> are not supported, | ||||
| due to the fact that current implementations block CPU-hotplug | ||||
| operations, which could result in deadlock. | ||||
| including even synchronous grace-period operations such as | ||||
| <tt>synchronize_rcu()</tt> and <tt>synchronize_rcu_expedited()</tt>. | ||||
| 
 | ||||
| <p> | ||||
| In addition, all-callback-wait operations such as | ||||
| However, all-callback-wait operations such as | ||||
| <tt>rcu_barrier()</tt> are also not supported, due to the | ||||
| fact that there are phases of CPU-hotplug operations where | ||||
| the outgoing CPU's callbacks will not be invoked until after | ||||
| the CPU-hotplug operation ends, which could also result in deadlock. | ||||
| Furthermore, <tt>rcu_barrier()</tt> blocks CPU-hotplug operations | ||||
| during its execution, which results in another type of deadlock | ||||
| when invoked from a CPU-hotplug notifier. | ||||
| 
 | ||||
| <h3><a name="Scheduler and RCU">Scheduler and RCU</a></h3> | ||||
| 
 | ||||
|  | @ -2863,6 +2927,27 @@ It also motivates the <tt>smp_mb__after_srcu_read_unlock()</tt> | |||
| API, which, in combination with <tt>srcu_read_unlock()</tt>, | ||||
| guarantees a full memory barrier. | ||||
| 
 | ||||
| <p> | ||||
| Also unlike other RCU flavors, SRCU's callbacks-wait function | ||||
| <tt>srcu_barrier()</tt> may be invoked from CPU-hotplug notifiers, | ||||
| though this is not necessarily a good idea. | ||||
| The reason that this is possible is that SRCU is insensitive | ||||
| to whether or not a CPU is online, which means that <tt>srcu_barrier()</tt> | ||||
| need not exclude CPU-hotplug operations. | ||||
| 
 | ||||
| <p> | ||||
| As of v4.12, SRCU's callbacks are maintained per-CPU, eliminating | ||||
| a locking bottleneck present in prior kernel versions. | ||||
| Although this will allow users to put much heavier stress on | ||||
| <tt>call_srcu()</tt>, it is important to note that SRCU does not | ||||
| yet take any special steps to deal with callback flooding. | ||||
| So if you are posting (say) 10,000 SRCU callbacks per second per CPU, | ||||
| you are probably totally OK, but if you intend to post (say) 1,000,000 | ||||
| SRCU callbacks per second per CPU, please run some tests first. | ||||
| SRCU just might need a few adjustment to deal with that sort of load. | ||||
| Of course, your mileage may vary based on the speed of your CPUs and | ||||
| the size of your memory. | ||||
| 
 | ||||
| <p> | ||||
| The | ||||
| <a href="https://lwn.net/Articles/609973/#RCU Per-Flavor API Table">SRCU API</a> | ||||
|  | @ -3021,8 +3106,8 @@ to do some redesign to avoid this scalability problem. | |||
| 
 | ||||
| <p> | ||||
| RCU disables CPU hotplug in a few places, perhaps most notably in the | ||||
| expedited grace-period and <tt>rcu_barrier()</tt> operations. | ||||
| If there is a strong reason to use expedited grace periods in CPU-hotplug | ||||
| <tt>rcu_barrier()</tt> operations. | ||||
| If there is a strong reason to use <tt>rcu_barrier()</tt> in CPU-hotplug | ||||
| notifiers, it will be necessary to avoid disabling CPU hotplug. | ||||
| This would introduce some complexity, so there had better be a <i>very</i> | ||||
| good reason. | ||||
|  | @ -3096,9 +3181,5 @@ Andy Lutomirski for their help in rendering | |||
| this article human readable, and to Michelle Rankin for her support | ||||
| of this effort. | ||||
| Other contributions are acknowledged in the Linux kernel's git archive. | ||||
| The cartoon is copyright (c) 2013 by Melissa Broussard, | ||||
| and is provided | ||||
| under the terms of the Creative Commons Attribution-Share Alike 3.0 | ||||
| United States license. | ||||
| 
 | ||||
| </body></html> | ||||
|  |  | |||
|  | @ -138,6 +138,15 @@ o	Be very careful about comparing pointers obtained from | |||
| 		This sort of comparison occurs frequently when scanning | ||||
| 		RCU-protected circular linked lists. | ||||
| 
 | ||||
| 		Note that if checks for being within an RCU read-side | ||||
| 		critical section are not required and the pointer is never | ||||
| 		dereferenced, rcu_access_pointer() should be used in place | ||||
| 		of rcu_dereference(). The rcu_access_pointer() primitive | ||||
| 		does not require an enclosing read-side critical section, | ||||
| 		and also omits the smp_read_barrier_depends() included in | ||||
| 		rcu_dereference(), which in turn should provide a small | ||||
| 		performance gain in some CPUs (e.g., the DEC Alpha). | ||||
| 
 | ||||
| 	o	The comparison is against a pointer that references memory | ||||
| 		that was initialized "a long time ago."  The reason | ||||
| 		this is safe is that even if misordering occurs, the | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| Using hlist_nulls to protect read-mostly linked lists and | ||||
| objects using SLAB_DESTROY_BY_RCU allocations. | ||||
| objects using SLAB_TYPESAFE_BY_RCU allocations. | ||||
| 
 | ||||
| Please read the basics in Documentation/RCU/listRCU.txt | ||||
| 
 | ||||
|  | @ -7,7 +7,7 @@ Using special makers (called 'nulls') is a convenient way | |||
| to solve following problem : | ||||
| 
 | ||||
| A typical RCU linked list managing objects which are | ||||
| allocated with SLAB_DESTROY_BY_RCU kmem_cache can | ||||
| allocated with SLAB_TYPESAFE_BY_RCU kmem_cache can | ||||
| use following algos : | ||||
| 
 | ||||
| 1) Lookup algo | ||||
|  | @ -96,7 +96,7 @@ unlock_chain(); // typically a spin_unlock() | |||
| 3) Remove algo | ||||
| -------------- | ||||
| Nothing special here, we can use a standard RCU hlist deletion. | ||||
| But thanks to SLAB_DESTROY_BY_RCU, beware a deleted object can be reused | ||||
| But thanks to SLAB_TYPESAFE_BY_RCU, beware a deleted object can be reused | ||||
| very very fast (before the end of RCU grace period) | ||||
| 
 | ||||
| if (put_last_reference_on(obj) { | ||||
|  |  | |||
|  | @ -1,9 +1,102 @@ | |||
| Using RCU's CPU Stall Detector | ||||
| 
 | ||||
| The rcu_cpu_stall_suppress module parameter enables RCU's CPU stall | ||||
| detector, which detects conditions that unduly delay RCU grace periods. | ||||
| This module parameter enables CPU stall detection by default, but | ||||
| may be overridden via boot-time parameter or at runtime via sysfs. | ||||
| This document first discusses what sorts of issues RCU's CPU stall | ||||
| detector can locate, and then discusses kernel parameters and Kconfig | ||||
| options that can be used to fine-tune the detector's operation.  Finally, | ||||
| this document explains the stall detector's "splat" format. | ||||
| 
 | ||||
| 
 | ||||
| What Causes RCU CPU Stall Warnings? | ||||
| 
 | ||||
| So your kernel printed an RCU CPU stall warning.  The next question is | ||||
| "What caused it?"  The following problems can result in RCU CPU stall | ||||
| warnings: | ||||
| 
 | ||||
| o	A CPU looping in an RCU read-side critical section. | ||||
| 
 | ||||
| o	A CPU looping with interrupts disabled. | ||||
| 
 | ||||
| o	A CPU looping with preemption disabled.  This condition can | ||||
| 	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh | ||||
| 	stalls. | ||||
| 
 | ||||
| o	A CPU looping with bottom halves disabled.  This condition can | ||||
| 	result in RCU-sched and RCU-bh stalls. | ||||
| 
 | ||||
| o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the | ||||
| 	kernel without invoking schedule().  Note that cond_resched() | ||||
| 	does not necessarily prevent RCU CPU stall warnings.  Therefore, | ||||
| 	if the looping in the kernel is really expected and desirable | ||||
| 	behavior, you might need to replace some of the cond_resched() | ||||
| 	calls with calls to cond_resched_rcu_qs(). | ||||
| 
 | ||||
| o	Booting Linux using a console connection that is too slow to | ||||
| 	keep up with the boot-time console-message rate.  For example, | ||||
| 	a 115Kbaud serial console can be -way- too slow to keep up | ||||
| 	with boot-time message rates, and will frequently result in | ||||
| 	RCU CPU stall warning messages.  Especially if you have added | ||||
| 	debug printk()s. | ||||
| 
 | ||||
| o	Anything that prevents RCU's grace-period kthreads from running. | ||||
| 	This can result in the "All QSes seen" console-log message. | ||||
| 	This message will include information on when the kthread last | ||||
| 	ran and how often it should be expected to run. | ||||
| 
 | ||||
| o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might | ||||
| 	happen to preempt a low-priority task in the middle of an RCU | ||||
| 	read-side critical section.   This is especially damaging if | ||||
| 	that low-priority task is not permitted to run on any other CPU, | ||||
| 	in which case the next RCU grace period can never complete, which | ||||
| 	will eventually cause the system to run out of memory and hang. | ||||
| 	While the system is in the process of running itself out of | ||||
| 	memory, you might see stall-warning messages. | ||||
| 
 | ||||
| o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that | ||||
| 	is running at a higher priority than the RCU softirq threads. | ||||
| 	This will prevent RCU callbacks from ever being invoked, | ||||
| 	and in a CONFIG_PREEMPT_RCU kernel will further prevent | ||||
| 	RCU grace periods from ever completing.  Either way, the | ||||
| 	system will eventually run out of memory and hang.  In the | ||||
| 	CONFIG_PREEMPT_RCU case, you might see stall-warning | ||||
| 	messages. | ||||
| 
 | ||||
| o	A hardware or software issue shuts off the scheduler-clock | ||||
| 	interrupt on a CPU that is not in dyntick-idle mode.  This | ||||
| 	problem really has happened, and seems to be most likely to | ||||
| 	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. | ||||
| 
 | ||||
| o	A bug in the RCU implementation. | ||||
| 
 | ||||
| o	A hardware failure.  This is quite unlikely, but has occurred | ||||
| 	at least once in real life.  A CPU failed in a running system, | ||||
| 	becoming unresponsive, but not causing an immediate crash. | ||||
| 	This resulted in a series of RCU CPU stall warnings, eventually | ||||
| 	leading the realization that the CPU had failed. | ||||
| 
 | ||||
| The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall | ||||
| warning.  Note that SRCU does -not- have CPU stall warnings.  Please note | ||||
| that RCU only detects CPU stalls when there is a grace period in progress. | ||||
| No grace period, no CPU stall warnings. | ||||
| 
 | ||||
| To diagnose the cause of the stall, inspect the stack traces. | ||||
| The offending function will usually be near the top of the stack. | ||||
| If you have a series of stall warnings from a single extended stall, | ||||
| comparing the stack traces can often help determine where the stall | ||||
| is occurring, which will usually be in the function nearest the top of | ||||
| that portion of the stack which remains the same from trace to trace. | ||||
| If you can reliably trigger the stall, ftrace can be quite helpful. | ||||
| 
 | ||||
| RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE | ||||
| and with RCU's event tracing.  For information on RCU's event tracing, | ||||
| see include/trace/events/rcu.h. | ||||
| 
 | ||||
| 
 | ||||
| Fine-Tuning the RCU CPU Stall Detector | ||||
| 
 | ||||
| The rcuupdate.rcu_cpu_stall_suppress module parameter disables RCU's | ||||
| CPU stall detector, which detects conditions that unduly delay RCU grace | ||||
| periods.  This module parameter enables CPU stall detection by default, | ||||
| but may be overridden via boot-time parameter or at runtime via sysfs. | ||||
| The stall detector's idea of what constitutes "unduly delayed" is | ||||
| controlled by a set of kernel configuration variables and cpp macros: | ||||
| 
 | ||||
|  | @ -56,6 +149,9 @@ rcupdate.rcu_task_stall_timeout | |||
| 	And continues with the output of sched_show_task() for each | ||||
| 	task stalling the current RCU-tasks grace period. | ||||
| 
 | ||||
| 
 | ||||
| Interpreting RCU's CPU Stall-Detector "Splats" | ||||
| 
 | ||||
| For non-RCU-tasks flavors of RCU, when a CPU detects that it is stalling, | ||||
| it will print a message similar to the following: | ||||
| 
 | ||||
|  | @ -178,89 +274,3 @@ grace period is in flight. | |||
| 
 | ||||
| It is entirely possible to see stall warnings from normal and from | ||||
| expedited grace periods at about the same time from the same run. | ||||
| 
 | ||||
| 
 | ||||
| What Causes RCU CPU Stall Warnings? | ||||
| 
 | ||||
| So your kernel printed an RCU CPU stall warning.  The next question is | ||||
| "What caused it?"  The following problems can result in RCU CPU stall | ||||
| warnings: | ||||
| 
 | ||||
| o	A CPU looping in an RCU read-side critical section. | ||||
| 	 | ||||
| o	A CPU looping with interrupts disabled.  This condition can | ||||
| 	result in RCU-sched and RCU-bh stalls. | ||||
| 
 | ||||
| o	A CPU looping with preemption disabled.  This condition can | ||||
| 	result in RCU-sched stalls and, if ksoftirqd is in use, RCU-bh | ||||
| 	stalls. | ||||
| 
 | ||||
| o	A CPU looping with bottom halves disabled.  This condition can | ||||
| 	result in RCU-sched and RCU-bh stalls. | ||||
| 
 | ||||
| o	For !CONFIG_PREEMPT kernels, a CPU looping anywhere in the | ||||
| 	kernel without invoking schedule().  Note that cond_resched() | ||||
| 	does not necessarily prevent RCU CPU stall warnings.  Therefore, | ||||
| 	if the looping in the kernel is really expected and desirable | ||||
| 	behavior, you might need to replace some of the cond_resched() | ||||
| 	calls with calls to cond_resched_rcu_qs(). | ||||
| 
 | ||||
| o	Booting Linux using a console connection that is too slow to | ||||
| 	keep up with the boot-time console-message rate.  For example, | ||||
| 	a 115Kbaud serial console can be -way- too slow to keep up | ||||
| 	with boot-time message rates, and will frequently result in | ||||
| 	RCU CPU stall warning messages.  Especially if you have added | ||||
| 	debug printk()s. | ||||
| 
 | ||||
| o	Anything that prevents RCU's grace-period kthreads from running. | ||||
| 	This can result in the "All QSes seen" console-log message. | ||||
| 	This message will include information on when the kthread last | ||||
| 	ran and how often it should be expected to run. | ||||
| 
 | ||||
| o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might | ||||
| 	happen to preempt a low-priority task in the middle of an RCU | ||||
| 	read-side critical section.   This is especially damaging if | ||||
| 	that low-priority task is not permitted to run on any other CPU, | ||||
| 	in which case the next RCU grace period can never complete, which | ||||
| 	will eventually cause the system to run out of memory and hang. | ||||
| 	While the system is in the process of running itself out of | ||||
| 	memory, you might see stall-warning messages. | ||||
| 
 | ||||
| o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that | ||||
| 	is running at a higher priority than the RCU softirq threads. | ||||
| 	This will prevent RCU callbacks from ever being invoked, | ||||
| 	and in a CONFIG_PREEMPT_RCU kernel will further prevent | ||||
| 	RCU grace periods from ever completing.  Either way, the | ||||
| 	system will eventually run out of memory and hang.  In the | ||||
| 	CONFIG_PREEMPT_RCU case, you might see stall-warning | ||||
| 	messages. | ||||
| 
 | ||||
| o	A hardware or software issue shuts off the scheduler-clock | ||||
| 	interrupt on a CPU that is not in dyntick-idle mode.  This | ||||
| 	problem really has happened, and seems to be most likely to | ||||
| 	result in RCU CPU stall warnings for CONFIG_NO_HZ_COMMON=n kernels. | ||||
| 
 | ||||
| o	A bug in the RCU implementation. | ||||
| 
 | ||||
| o	A hardware failure.  This is quite unlikely, but has occurred | ||||
| 	at least once in real life.  A CPU failed in a running system, | ||||
| 	becoming unresponsive, but not causing an immediate crash. | ||||
| 	This resulted in a series of RCU CPU stall warnings, eventually | ||||
| 	leading the realization that the CPU had failed. | ||||
| 
 | ||||
| The RCU, RCU-sched, RCU-bh, and RCU-tasks implementations have CPU stall | ||||
| warning.  Note that SRCU does -not- have CPU stall warnings.  Please note | ||||
| that RCU only detects CPU stalls when there is a grace period in progress. | ||||
| No grace period, no CPU stall warnings. | ||||
| 
 | ||||
| To diagnose the cause of the stall, inspect the stack traces. | ||||
| The offending function will usually be near the top of the stack. | ||||
| If you have a series of stall warnings from a single extended stall, | ||||
| comparing the stack traces can often help determine where the stall | ||||
| is occurring, which will usually be in the function nearest the top of | ||||
| that portion of the stack which remains the same from trace to trace. | ||||
| If you can reliably trigger the stall, ftrace can be quite helpful. | ||||
| 
 | ||||
| RCU bugs can often be debugged with the help of CONFIG_RCU_TRACE | ||||
| and with RCU's event tracing.  For information on RCU's event tracing, | ||||
| see include/trace/events/rcu.h. | ||||
|  |  | |||
|  | @ -562,7 +562,9 @@ This section presents a "toy" RCU implementation that is based on | |||
| familiar locking primitives.  Its overhead makes it a non-starter for | ||||
| real-life use, as does its lack of scalability.  It is also unsuitable | ||||
| for realtime use, since it allows scheduling latency to "bleed" from | ||||
| one read-side critical section to another. | ||||
| one read-side critical section to another.  It also assumes recursive | ||||
| reader-writer locks:  If you try this with non-recursive locks, and | ||||
| you allow nested rcu_read_lock() calls, you can deadlock. | ||||
| 
 | ||||
| However, it is probably the easiest implementation to relate to, so is | ||||
| a good starting point. | ||||
|  | @ -587,20 +589,21 @@ It is extremely simple: | |||
| 		write_unlock(&rcu_gp_mutex); | ||||
| 	} | ||||
| 
 | ||||
| [You can ignore rcu_assign_pointer() and rcu_dereference() without | ||||
| missing much.  But here they are anyway.  And whatever you do, don't | ||||
| forget about them when submitting patches making use of RCU!] | ||||
| [You can ignore rcu_assign_pointer() and rcu_dereference() without missing | ||||
| much.  But here are simplified versions anyway.  And whatever you do, | ||||
| don't forget about them when submitting patches making use of RCU!] | ||||
| 
 | ||||
| 	#define rcu_assign_pointer(p, v)	({ \ | ||||
| 							smp_wmb(); \ | ||||
| 							(p) = (v); \ | ||||
| 						}) | ||||
| 	#define rcu_assign_pointer(p, v) \ | ||||
| 	({ \ | ||||
| 		smp_store_release(&(p), (v)); \ | ||||
| 	}) | ||||
| 
 | ||||
| 	#define rcu_dereference(p)     ({ \ | ||||
| 					typeof(p) _________p1 = p; \ | ||||
| 					smp_read_barrier_depends(); \ | ||||
| 					(_________p1); \ | ||||
| 					}) | ||||
| 	#define rcu_dereference(p) \ | ||||
| 	({ \ | ||||
| 		typeof(p) _________p1 = p; \ | ||||
| 		smp_read_barrier_depends(); \ | ||||
| 		(_________p1); \ | ||||
| 	}) | ||||
| 
 | ||||
| 
 | ||||
| The rcu_read_lock() and rcu_read_unlock() primitive read-acquire | ||||
|  | @ -925,7 +928,8 @@ d.	Do you need RCU grace periods to complete even in the face | |||
| 
 | ||||
| e.	Is your workload too update-intensive for normal use of | ||||
| 	RCU, but inappropriate for other synchronization mechanisms? | ||||
| 	If so, consider SLAB_DESTROY_BY_RCU.  But please be careful! | ||||
| 	If so, consider SLAB_TYPESAFE_BY_RCU (which was originally | ||||
| 	named SLAB_DESTROY_BY_RCU).  But please be careful! | ||||
| 
 | ||||
| f.	Do you need read-side critical sections that are respected | ||||
| 	even though they are in the middle of the idle loop, during | ||||
|  |  | |||
|  | @ -768,7 +768,7 @@ equal to zero, in which case the compiler is within its rights to | |||
| transform the above code into the following: | ||||
| 
 | ||||
| 	q = READ_ONCE(a); | ||||
| 	WRITE_ONCE(b, 1); | ||||
| 	WRITE_ONCE(b, 2); | ||||
| 	do_something_else(); | ||||
| 
 | ||||
| Given this transformation, the CPU is not required to respect the ordering | ||||
|  |  | |||
|  | @ -320,6 +320,9 @@ config HAVE_CMPXCHG_LOCAL | |||
| config HAVE_CMPXCHG_DOUBLE | ||||
| 	bool | ||||
| 
 | ||||
| config ARCH_WEAK_RELEASE_ACQUIRE | ||||
| 	bool | ||||
| 
 | ||||
| config ARCH_WANT_IPC_PARSE_VERSION | ||||
| 	bool | ||||
| 
 | ||||
|  |  | |||
|  | @ -99,6 +99,7 @@ config PPC | |||
| 	select ARCH_USE_BUILTIN_BSWAP | ||||
| 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64 | ||||
| 	select ARCH_WANT_IPC_PARSE_VERSION | ||||
| 	select ARCH_WEAK_RELEASE_ACQUIRE | ||||
| 	select BINFMT_ELF | ||||
| 	select BUILDTIME_EXTABLE_SORT | ||||
| 	select CLONE_BACKWARDS | ||||
|  |  | |||
|  | @ -4665,7 +4665,7 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) | |||
| 	dev_priv->requests = KMEM_CACHE(drm_i915_gem_request, | ||||
| 					SLAB_HWCACHE_ALIGN | | ||||
| 					SLAB_RECLAIM_ACCOUNT | | ||||
| 					SLAB_DESTROY_BY_RCU); | ||||
| 					SLAB_TYPESAFE_BY_RCU); | ||||
| 	if (!dev_priv->requests) | ||||
| 		goto err_vmas; | ||||
| 
 | ||||
|  |  | |||
|  | @ -493,7 +493,7 @@ static inline struct drm_i915_gem_request * | |||
| __i915_gem_active_get_rcu(const struct i915_gem_active *active) | ||||
| { | ||||
| 	/* Performing a lockless retrieval of the active request is super
 | ||||
| 	 * tricky. SLAB_DESTROY_BY_RCU merely guarantees that the backing | ||||
| 	 * tricky. SLAB_TYPESAFE_BY_RCU merely guarantees that the backing | ||||
| 	 * slab of request objects will not be freed whilst we hold the | ||||
| 	 * RCU read lock. It does not guarantee that the request itself | ||||
| 	 * will not be freed and then *reused*. Viz, | ||||
|  |  | |||
|  | @ -1071,7 +1071,7 @@ int ldlm_init(void) | |||
| 	ldlm_lock_slab = kmem_cache_create("ldlm_locks", | ||||
| 					   sizeof(struct ldlm_lock), 0, | ||||
| 					   SLAB_HWCACHE_ALIGN | | ||||
| 					   SLAB_DESTROY_BY_RCU, NULL); | ||||
| 					   SLAB_TYPESAFE_BY_RCU, NULL); | ||||
| 	if (!ldlm_lock_slab) { | ||||
| 		kmem_cache_destroy(ldlm_resource_slab); | ||||
| 		return -ENOMEM; | ||||
|  |  | |||
|  | @ -2340,7 +2340,7 @@ static int jbd2_journal_init_journal_head_cache(void) | |||
| 	jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head", | ||||
| 				sizeof(struct journal_head), | ||||
| 				0,		/* offset */ | ||||
| 				SLAB_TEMPORARY | SLAB_DESTROY_BY_RCU, | ||||
| 				SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU, | ||||
| 				NULL);		/* ctor */ | ||||
| 	retval = 0; | ||||
| 	if (!jbd2_journal_head_cache) { | ||||
|  |  | |||
|  | @ -38,7 +38,7 @@ void signalfd_cleanup(struct sighand_struct *sighand) | |||
| 	/*
 | ||||
| 	 * The lockless check can race with remove_wait_queue() in progress, | ||||
| 	 * but in this case its caller should run under rcu_read_lock() and | ||||
| 	 * sighand_cachep is SLAB_DESTROY_BY_RCU, we can safely return. | ||||
| 	 * sighand_cachep is SLAB_TYPESAFE_BY_RCU, we can safely return. | ||||
| 	 */ | ||||
| 	if (likely(!waitqueue_active(wqh))) | ||||
| 		return; | ||||
|  |  | |||
|  | @ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) | |||
|  * | ||||
|  * Function returns NULL if no refcount could be obtained, or the fence. | ||||
|  * This function handles acquiring a reference to a fence that may be | ||||
|  * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), | ||||
|  * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU), | ||||
|  * so long as the caller is using RCU on the pointer to the fence. | ||||
|  * | ||||
|  * An alternative mechanism is to employ a seqlock to protect a bunch of | ||||
|  | @ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) | |||
| 		 * have successfully acquire a reference to it. If it no | ||||
| 		 * longer matches, we are holding a reference to some other | ||||
| 		 * reallocated pointer. This is possible if the allocator | ||||
| 		 * is using a freelist like SLAB_DESTROY_BY_RCU where the | ||||
| 		 * is using a freelist like SLAB_TYPESAFE_BY_RCU where the | ||||
| 		 * fence remains valid for the RCU grace period, but it | ||||
| 		 * may be reallocated. When using such allocators, we are | ||||
| 		 * responsible for ensuring the reference we get is to | ||||
|  |  | |||
|  | @ -375,8 +375,6 @@ struct kvm { | |||
| 	struct mutex slots_lock; | ||||
| 	struct mm_struct *mm; /* userspace tied to this vm */ | ||||
| 	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; | ||||
| 	struct srcu_struct srcu; | ||||
| 	struct srcu_struct irq_srcu; | ||||
| 	struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; | ||||
| 
 | ||||
| 	/*
 | ||||
|  | @ -429,6 +427,8 @@ struct kvm { | |||
| 	struct list_head devices; | ||||
| 	struct dentry *debugfs_dentry; | ||||
| 	struct kvm_stat_data **debugfs_stat_data; | ||||
| 	struct srcu_struct srcu; | ||||
| 	struct srcu_struct irq_srcu; | ||||
| }; | ||||
| 
 | ||||
| #define kvm_err(fmt, ...) \ | ||||
|  |  | |||
							
								
								
									
										99
									
								
								include/linux/rcu_node_tree.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								include/linux/rcu_node_tree.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,99 @@ | |||
| /*
 | ||||
|  * RCU node combining tree definitions.  These are used to compute | ||||
|  * global attributes while avoiding common-case global contention.  A key | ||||
|  * property that these computations rely on is a tournament-style approach | ||||
|  * where only one of the tasks contending a lower level in the tree need | ||||
|  * advance to the next higher level.  If properly configured, this allows | ||||
|  * unlimited scalability while maintaining a constant level of contention | ||||
|  * on the root node. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright IBM Corporation, 2017 | ||||
|  * | ||||
|  * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #ifndef __LINUX_RCU_NODE_TREE_H | ||||
| #define __LINUX_RCU_NODE_TREE_H | ||||
| 
 | ||||
| /*
 | ||||
|  * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and | ||||
|  * CONFIG_RCU_FANOUT_LEAF. | ||||
|  * In theory, it should be possible to add more levels straightforwardly. | ||||
|  * In practice, this did work well going from three levels to four. | ||||
|  * Of course, your mileage may vary. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_FANOUT | ||||
| #define RCU_FANOUT CONFIG_RCU_FANOUT | ||||
| #else /* #ifdef CONFIG_RCU_FANOUT */ | ||||
| # ifdef CONFIG_64BIT | ||||
| # define RCU_FANOUT 64 | ||||
| # else | ||||
| # define RCU_FANOUT 32 | ||||
| # endif | ||||
| #endif /* #else #ifdef CONFIG_RCU_FANOUT */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_FANOUT_LEAF | ||||
| #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF | ||||
| #else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ | ||||
| #define RCU_FANOUT_LEAF 16 | ||||
| #endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ | ||||
| 
 | ||||
| #define RCU_FANOUT_1	      (RCU_FANOUT_LEAF) | ||||
| #define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT) | ||||
| #define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT) | ||||
| #define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT) | ||||
| 
 | ||||
| #if NR_CPUS <= RCU_FANOUT_1 | ||||
| #  define RCU_NUM_LVLS	      1 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_NODES	      NUM_RCU_LVL_0 | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" } | ||||
| #elif NR_CPUS <= RCU_FANOUT_2 | ||||
| #  define RCU_NUM_LVLS	      2 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | ||||
| #  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1) | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" } | ||||
| #elif NR_CPUS <= RCU_FANOUT_3 | ||||
| #  define RCU_NUM_LVLS	      3 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | ||||
| #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | ||||
| #  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2) | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } | ||||
| #elif NR_CPUS <= RCU_FANOUT_4 | ||||
| #  define RCU_NUM_LVLS	      4 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) | ||||
| #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | ||||
| #  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | ||||
| #  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } | ||||
| #else | ||||
| # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" | ||||
| #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ | ||||
| 
 | ||||
| #endif /* __LINUX_RCU_NODE_TREE_H */ | ||||
							
								
								
									
										712
									
								
								include/linux/rcu_segcblist.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										712
									
								
								include/linux/rcu_segcblist.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,712 @@ | |||
| /*
 | ||||
|  * RCU segmented callback lists | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright IBM Corporation, 2017 | ||||
|  * | ||||
|  * Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #ifndef __KERNEL_RCU_SEGCBLIST_H | ||||
| #define __KERNEL_RCU_SEGCBLIST_H | ||||
| 
 | ||||
| /* Simple unsegmented callback lists. */ | ||||
| struct rcu_cblist { | ||||
| 	struct rcu_head *head; | ||||
| 	struct rcu_head **tail; | ||||
| 	long len; | ||||
| 	long len_lazy; | ||||
| }; | ||||
| 
 | ||||
| #define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } | ||||
| 
 | ||||
| /* Initialize simple callback list. */ | ||||
| static inline void rcu_cblist_init(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	rclp->head = NULL; | ||||
| 	rclp->tail = &rclp->head; | ||||
| 	rclp->len = 0; | ||||
| 	rclp->len_lazy = 0; | ||||
| } | ||||
| 
 | ||||
| /* Is simple callback list empty? */ | ||||
| static inline bool rcu_cblist_empty(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	return !rclp->head; | ||||
| } | ||||
| 
 | ||||
| /* Return number of callbacks in simple callback list. */ | ||||
| static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	return rclp->len; | ||||
| } | ||||
| 
 | ||||
| /* Return number of lazy callbacks in simple callback list. */ | ||||
| static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	return rclp->len_lazy; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Debug function to actually count the number of callbacks. | ||||
|  * If the number exceeds the limit specified, return -1. | ||||
|  */ | ||||
| static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) | ||||
| { | ||||
| 	int cnt = 0; | ||||
| 	struct rcu_head **rhpp = &rclp->head; | ||||
| 
 | ||||
| 	for (;;) { | ||||
| 		if (!*rhpp) | ||||
| 			return cnt; | ||||
| 		if (++cnt > lim) | ||||
| 			return -1; | ||||
| 		rhpp = &(*rhpp)->next; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Dequeue the oldest rcu_head structure from the specified callback | ||||
|  * list.  This function assumes that the callback is non-lazy, but | ||||
|  * the caller can later invoke rcu_cblist_dequeued_lazy() if it | ||||
|  * finds otherwise (and if it cares about laziness).  This allows | ||||
|  * different users to have different ways of determining laziness. | ||||
|  */ | ||||
| static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	struct rcu_head *rhp; | ||||
| 
 | ||||
| 	rhp = rclp->head; | ||||
| 	if (!rhp) | ||||
| 		return NULL; | ||||
| 	rclp->len--; | ||||
| 	rclp->head = rhp->next; | ||||
| 	if (!rclp->head) | ||||
| 		rclp->tail = &rclp->head; | ||||
| 	return rhp; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Account for the fact that a previously dequeued callback turned out | ||||
|  * to be marked as lazy. | ||||
|  */ | ||||
| static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	rclp->len_lazy--; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Interim function to return rcu_cblist head pointer.  Longer term, the | ||||
|  * rcu_cblist will be used more pervasively, removing the need for this | ||||
|  * function. | ||||
|  */ | ||||
| static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	return rclp->head; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Interim function to return rcu_cblist head pointer.  Longer term, the | ||||
|  * rcu_cblist will be used more pervasively, removing the need for this | ||||
|  * function. | ||||
|  */ | ||||
| static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) | ||||
| { | ||||
| 	WARN_ON_ONCE(rcu_cblist_empty(rclp)); | ||||
| 	return rclp->tail; | ||||
| } | ||||
| 
 | ||||
| /* Complicated segmented callback lists.  ;-) */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Index values for segments in rcu_segcblist structure. | ||||
|  * | ||||
|  * The segments are as follows: | ||||
|  * | ||||
|  * [head, *tails[RCU_DONE_TAIL]): | ||||
|  *	Callbacks whose grace period has elapsed, and thus can be invoked. | ||||
|  * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]): | ||||
|  *	Callbacks waiting for the current GP from the current CPU's viewpoint. | ||||
|  * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]): | ||||
|  *	Callbacks that arrived before the next GP started, again from | ||||
|  *	the current CPU's viewpoint.  These can be handled by the next GP. | ||||
|  * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]): | ||||
|  *	Callbacks that might have arrived after the next GP started. | ||||
|  *	There is some uncertainty as to when a given GP starts and | ||||
|  *	ends, but a CPU knows the exact times if it is the one starting | ||||
|  *	or ending the GP.  Other CPUs know that the previous GP ends | ||||
|  *	before the next one starts. | ||||
|  * | ||||
|  * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also | ||||
|  * empty. | ||||
|  * | ||||
|  * The ->gp_seq[] array contains the grace-period number at which the | ||||
|  * corresponding segment of callbacks will be ready to invoke.  A given | ||||
|  * element of this array is meaningful only when the corresponding segment | ||||
|  * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks | ||||
|  * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have | ||||
|  * not yet been assigned a grace-period number). | ||||
|  */ | ||||
| #define RCU_DONE_TAIL		0	/* Also RCU_WAIT head. */ | ||||
| #define RCU_WAIT_TAIL		1	/* Also RCU_NEXT_READY head. */ | ||||
| #define RCU_NEXT_READY_TAIL	2	/* Also RCU_NEXT head. */ | ||||
| #define RCU_NEXT_TAIL		3 | ||||
| #define RCU_CBLIST_NSEGS	4 | ||||
| 
 | ||||
| struct rcu_segcblist { | ||||
| 	struct rcu_head *head; | ||||
| 	struct rcu_head **tails[RCU_CBLIST_NSEGS]; | ||||
| 	unsigned long gp_seq[RCU_CBLIST_NSEGS]; | ||||
| 	long len; | ||||
| 	long len_lazy; | ||||
| }; | ||||
| 
 | ||||
| #define RCU_SEGCBLIST_INITIALIZER(n) \ | ||||
| { \ | ||||
| 	.head = NULL, \ | ||||
| 	.tails[RCU_DONE_TAIL] = &n.head, \ | ||||
| 	.tails[RCU_WAIT_TAIL] = &n.head, \ | ||||
| 	.tails[RCU_NEXT_READY_TAIL] = &n.head, \ | ||||
| 	.tails[RCU_NEXT_TAIL] = &n.head, \ | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Initialize an rcu_segcblist structure. | ||||
|  */ | ||||
| static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); | ||||
| 	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); | ||||
| 	rsclp->head = NULL; | ||||
| 	for (i = 0; i < RCU_CBLIST_NSEGS; i++) | ||||
| 		rsclp->tails[i] = &rsclp->head; | ||||
| 	rsclp->len = 0; | ||||
| 	rsclp->len_lazy = 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Is the specified rcu_segcblist structure empty? | ||||
|  * | ||||
|  * But careful!  The fact that the ->head field is NULL does not | ||||
|  * necessarily imply that there are no callbacks associated with | ||||
|  * this structure.  When callbacks are being invoked, they are | ||||
|  * removed as a group.  If callback invocation must be preempted, | ||||
|  * the remaining callbacks will be added back to the list.  Either | ||||
|  * way, the counts are updated later. | ||||
|  * | ||||
|  * So it is often the case that rcu_segcblist_n_cbs() should be used | ||||
|  * instead. | ||||
|  */ | ||||
| static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return !rsclp->head; | ||||
| } | ||||
| 
 | ||||
| /* Return number of callbacks in segmented callback list. */ | ||||
| static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return READ_ONCE(rsclp->len); | ||||
| } | ||||
| 
 | ||||
| /* Return number of lazy callbacks in segmented callback list. */ | ||||
| static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return rsclp->len_lazy; | ||||
| } | ||||
| 
 | ||||
| /* Return number of lazy callbacks in segmented callback list. */ | ||||
| static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return rsclp->len - rsclp->len_lazy; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Is the specified rcu_segcblist enabled, for example, not corresponding | ||||
|  * to an offline or callback-offloaded CPU? | ||||
|  */ | ||||
| static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return !!rsclp->tails[RCU_NEXT_TAIL]; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Disable the specified rcu_segcblist structure, so that callbacks can | ||||
|  * no longer be posted to it.  This structure must be empty. | ||||
|  */ | ||||
| static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); | ||||
| 	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); | ||||
| 	WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); | ||||
| 	rsclp->tails[RCU_NEXT_TAIL] = NULL; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Is the specified segment of the specified rcu_segcblist structure | ||||
|  * empty of callbacks? | ||||
|  */ | ||||
| static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) | ||||
| { | ||||
| 	if (seg == RCU_DONE_TAIL) | ||||
| 		return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; | ||||
| 	return rsclp->tails[seg - 1] == rsclp->tails[seg]; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Are all segments following the specified segment of the specified | ||||
|  * rcu_segcblist structure empty of callbacks?  (The specified | ||||
|  * segment might well contain callbacks.) | ||||
|  */ | ||||
| static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) | ||||
| { | ||||
| 	return !*rsclp->tails[seg]; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Does the specified rcu_segcblist structure contain callbacks that | ||||
|  * are ready to be invoked? | ||||
|  */ | ||||
| static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return rcu_segcblist_is_enabled(rsclp) && | ||||
| 	       &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Does the specified rcu_segcblist structure contain callbacks that | ||||
|  * are still pending, that is, not yet ready to be invoked? | ||||
|  */ | ||||
| static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return rcu_segcblist_is_enabled(rsclp) && | ||||
| 	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Dequeue and return the first ready-to-invoke callback.  If there | ||||
|  * are no ready-to-invoke callbacks, return NULL.  Disables interrupts | ||||
|  * to avoid interference.  Does not protect from interference from other | ||||
|  * CPUs or tasks. | ||||
|  */ | ||||
| static inline struct rcu_head * | ||||
| rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	int i; | ||||
| 	struct rcu_head *rhp; | ||||
| 
 | ||||
| 	local_irq_save(flags); | ||||
| 	if (!rcu_segcblist_ready_cbs(rsclp)) { | ||||
| 		local_irq_restore(flags); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	rhp = rsclp->head; | ||||
| 	BUG_ON(!rhp); | ||||
| 	rsclp->head = rhp->next; | ||||
| 	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { | ||||
| 		if (rsclp->tails[i] != &rhp->next) | ||||
| 			break; | ||||
| 		rsclp->tails[i] = &rsclp->head; | ||||
| 	} | ||||
| 	smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ | ||||
| 	WRITE_ONCE(rsclp->len, rsclp->len - 1); | ||||
| 	local_irq_restore(flags); | ||||
| 	return rhp; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Account for the fact that a previously dequeued callback turned out | ||||
|  * to be marked as lazy. | ||||
|  */ | ||||
| static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	local_irq_save(flags); | ||||
| 	rsclp->len_lazy--; | ||||
| 	local_irq_restore(flags); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return a pointer to the first callback in the specified rcu_segcblist | ||||
|  * structure.  This is useful for diagnostics. | ||||
|  */ | ||||
| static inline struct rcu_head * | ||||
| rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	if (rcu_segcblist_is_enabled(rsclp)) | ||||
| 		return rsclp->head; | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return a pointer to the first pending callback in the specified | ||||
|  * rcu_segcblist structure.  This is useful just after posting a given | ||||
|  * callback -- if that callback is the first pending callback, then | ||||
|  * you cannot rely on someone else having already started up the required | ||||
|  * grace period. | ||||
|  */ | ||||
| static inline struct rcu_head * | ||||
| rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	if (rcu_segcblist_is_enabled(rsclp)) | ||||
| 		return *rsclp->tails[RCU_DONE_TAIL]; | ||||
| 	return NULL; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Does the specified rcu_segcblist structure contain callbacks that | ||||
|  * have not yet been processed beyond having been posted, that is, | ||||
|  * does it contain callbacks in its last segment? | ||||
|  */ | ||||
| static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return rcu_segcblist_is_enabled(rsclp) && | ||||
| 	       !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Enqueue the specified callback onto the specified rcu_segcblist | ||||
|  * structure, updating accounting as needed.  Note that the ->len | ||||
|  * field may be accessed locklessly, hence the WRITE_ONCE(). | ||||
|  * The ->len field is used by rcu_barrier() and friends to determine | ||||
|  * if it must post a callback on this structure, and it is OK | ||||
|  * for rcu_barrier() to sometimes post callbacks needlessly, but | ||||
|  * absolutely not OK for it to ever miss posting a callback. | ||||
|  */ | ||||
| static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, | ||||
| 					 struct rcu_head *rhp, bool lazy) | ||||
| { | ||||
| 	WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ | ||||
| 	if (lazy) | ||||
| 		rsclp->len_lazy++; | ||||
| 	smp_mb(); /* Ensure counts are updated before callback is enqueued. */ | ||||
| 	rhp->next = NULL; | ||||
| 	*rsclp->tails[RCU_NEXT_TAIL] = rhp; | ||||
| 	rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Entrain the specified callback onto the specified rcu_segcblist at | ||||
|  * the end of the last non-empty segment.  If the entire rcu_segcblist | ||||
|  * is empty, make no change, but return false. | ||||
|  * | ||||
|  * This is intended for use by rcu_barrier()-like primitives, -not- | ||||
|  * for normal grace-period use.  IMPORTANT:  The callback you enqueue | ||||
|  * will wait for all prior callbacks, NOT necessarily for a grace | ||||
|  * period.  You have been warned. | ||||
|  */ | ||||
| static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, | ||||
| 					 struct rcu_head *rhp, bool lazy) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (rcu_segcblist_n_cbs(rsclp) == 0) | ||||
| 		return false; | ||||
| 	WRITE_ONCE(rsclp->len, rsclp->len + 1); | ||||
| 	if (lazy) | ||||
| 		rsclp->len_lazy++; | ||||
| 	smp_mb(); /* Ensure counts are updated before callback is entrained. */ | ||||
| 	rhp->next = NULL; | ||||
| 	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) | ||||
| 		if (rsclp->tails[i] != rsclp->tails[i - 1]) | ||||
| 			break; | ||||
| 	*rsclp->tails[i] = rhp; | ||||
| 	for (; i <= RCU_NEXT_TAIL; i++) | ||||
| 		rsclp->tails[i] = &rhp->next; | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Extract only the counts from the specified rcu_segcblist structure, | ||||
|  * and place them in the specified rcu_cblist structure.  This function | ||||
|  * supports both callback orphaning and invocation, hence the separation | ||||
|  * of counts and callbacks.  (Callbacks ready for invocation must be | ||||
|  * orphaned and adopted separately from pending callbacks, but counts | ||||
|  * apply to all callbacks.  Locking must be used to make sure that | ||||
|  * both orphaned-callbacks lists are consistent.) | ||||
|  */ | ||||
| static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, | ||||
| 					       struct rcu_cblist *rclp) | ||||
| { | ||||
| 	rclp->len_lazy += rsclp->len_lazy; | ||||
| 	rclp->len += rsclp->len; | ||||
| 	rsclp->len_lazy = 0; | ||||
| 	WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Extract only those callbacks ready to be invoked from the specified | ||||
|  * rcu_segcblist structure and place them in the specified rcu_cblist | ||||
|  * structure. | ||||
|  */ | ||||
| static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, | ||||
| 						  struct rcu_cblist *rclp) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (!rcu_segcblist_ready_cbs(rsclp)) | ||||
| 		return; /* Nothing to do. */ | ||||
| 	*rclp->tail = rsclp->head; | ||||
| 	rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; | ||||
| 	*rsclp->tails[RCU_DONE_TAIL] = NULL; | ||||
| 	rclp->tail = rsclp->tails[RCU_DONE_TAIL]; | ||||
| 	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) | ||||
| 		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) | ||||
| 			rsclp->tails[i] = &rsclp->head; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Extract only those callbacks still pending (not yet ready to be | ||||
|  * invoked) from the specified rcu_segcblist structure and place them in | ||||
|  * the specified rcu_cblist structure.  Note that this loses information | ||||
|  * about any callbacks that might have been partway done waiting for | ||||
|  * their grace period.  Too bad!  They will have to start over. | ||||
|  */ | ||||
| static inline void | ||||
| rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, | ||||
| 			       struct rcu_cblist *rclp) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (!rcu_segcblist_pend_cbs(rsclp)) | ||||
| 		return; /* Nothing to do. */ | ||||
| 	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; | ||||
| 	rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; | ||||
| 	*rsclp->tails[RCU_DONE_TAIL] = NULL; | ||||
| 	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) | ||||
| 		rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Move the entire contents of the specified rcu_segcblist structure, | ||||
|  * counts, callbacks, and all, to the specified rcu_cblist structure. | ||||
|  * @@@ Why do we need this???  Moving early-boot CBs to NOCB lists? | ||||
|  * @@@ Memory barrier needed?  (Not if only used at boot time...) | ||||
|  */ | ||||
| static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp, | ||||
| 					     struct rcu_cblist *rclp) | ||||
| { | ||||
| 	rcu_segcblist_extract_done_cbs(rsclp, rclp); | ||||
| 	rcu_segcblist_extract_pend_cbs(rsclp, rclp); | ||||
| 	rcu_segcblist_extract_count(rsclp, rclp); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Insert counts from the specified rcu_cblist structure in the | ||||
|  * specified rcu_segcblist structure. | ||||
|  */ | ||||
| static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, | ||||
| 					      struct rcu_cblist *rclp) | ||||
| { | ||||
| 	rsclp->len_lazy += rclp->len_lazy; | ||||
| 	/* ->len sampled locklessly. */ | ||||
| 	WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); | ||||
| 	rclp->len_lazy = 0; | ||||
| 	rclp->len = 0; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Move callbacks from the specified rcu_cblist to the beginning of the | ||||
|  * done-callbacks segment of the specified rcu_segcblist. | ||||
|  */ | ||||
| static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, | ||||
| 						 struct rcu_cblist *rclp) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (!rclp->head) | ||||
| 		return; /* No callbacks to move. */ | ||||
| 	*rclp->tail = rsclp->head; | ||||
| 	rsclp->head = rclp->head; | ||||
| 	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) | ||||
| 		if (&rsclp->head == rsclp->tails[i]) | ||||
| 			rsclp->tails[i] = rclp->tail; | ||||
| 		else | ||||
| 			break; | ||||
| 	rclp->head = NULL; | ||||
| 	rclp->tail = &rclp->head; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Move callbacks from the specified rcu_cblist to the end of the | ||||
|  * new-callbacks segment of the specified rcu_segcblist. | ||||
|  */ | ||||
| static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, | ||||
| 						 struct rcu_cblist *rclp) | ||||
| { | ||||
| 	if (!rclp->head) | ||||
| 		return; /* Nothing to do. */ | ||||
| 	*rsclp->tails[RCU_NEXT_TAIL] = rclp->head; | ||||
| 	rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; | ||||
| 	rclp->head = NULL; | ||||
| 	rclp->tail = &rclp->head; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Advance the callbacks in the specified rcu_segcblist structure based | ||||
|  * on the current value passed in for the grace-period counter. | ||||
|  */ | ||||
| static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, | ||||
| 					 unsigned long seq) | ||||
| { | ||||
| 	int i, j; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); | ||||
| 	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) | ||||
| 		return; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Find all callbacks whose ->gp_seq numbers indicate that they | ||||
| 	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment. | ||||
| 	 */ | ||||
| 	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { | ||||
| 		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) | ||||
| 			break; | ||||
| 		rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; | ||||
| 	} | ||||
| 
 | ||||
| 	/* If no callbacks moved, nothing more need be done. */ | ||||
| 	if (i == RCU_WAIT_TAIL) | ||||
| 		return; | ||||
| 
 | ||||
| 	/* Clean up tail pointers that might have been misordered above. */ | ||||
| 	for (j = RCU_WAIT_TAIL; j < i; j++) | ||||
| 		rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Callbacks moved, so clean up the misordered ->tails[] pointers | ||||
| 	 * that now point into the middle of the list of ready-to-invoke | ||||
| 	 * callbacks.  The overall effect is to copy down the later pointers | ||||
| 	 * into the gap that was created by the now-ready segments. | ||||
| 	 */ | ||||
| 	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { | ||||
| 		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) | ||||
| 			break;  /* No more callbacks. */ | ||||
| 		rsclp->tails[j] = rsclp->tails[i]; | ||||
| 		rsclp->gp_seq[j] = rsclp->gp_seq[i]; | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * "Accelerate" callbacks based on more-accurate grace-period information. | ||||
|  * The reason for this is that RCU does not synchronize the beginnings and | ||||
|  * ends of grace periods, and that callbacks are posted locally.  This in | ||||
|  * turn means that the callbacks must be labelled conservatively early | ||||
|  * on, as getting exact information would degrade both performance and | ||||
|  * scalability.  When more accurate grace-period information becomes | ||||
|  * available, previously posted callbacks can be "accelerated", marking | ||||
|  * them to complete at the end of the earlier grace period. | ||||
|  * | ||||
|  * This function operates on an rcu_segcblist structure, and also the | ||||
|  * grace-period sequence number seq at which new callbacks would become | ||||
|  * ready to invoke.  Returns true if there are callbacks that won't be | ||||
|  * ready to invoke until seq, false otherwise. | ||||
|  */ | ||||
| static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, | ||||
| 					    unsigned long seq) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); | ||||
| 	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Find the segment preceding the oldest segment of callbacks | ||||
| 	 * whose ->gp_seq[] completion is at or after that passed in via | ||||
| 	 * "seq", skipping any empty segments.  This oldest segment, along | ||||
| 	 * with any later segments, can be merged in with any newly arrived | ||||
| 	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" | ||||
| 	 * as their ->gp_seq[] grace-period completion sequence number. | ||||
| 	 */ | ||||
| 	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) | ||||
| 		if (rsclp->tails[i] != rsclp->tails[i - 1] && | ||||
| 		    ULONG_CMP_LT(rsclp->gp_seq[i], seq)) | ||||
| 			break; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If all the segments contain callbacks that correspond to | ||||
| 	 * earlier grace-period sequence numbers than "seq", leave. | ||||
| 	 * Assuming that the rcu_segcblist structure has enough | ||||
| 	 * segments in its arrays, this can only happen if some of | ||||
| 	 * the non-done segments contain callbacks that really are | ||||
| 	 * ready to invoke.  This situation will get straightened | ||||
| 	 * out by the next call to rcu_segcblist_advance(). | ||||
| 	 * | ||||
| 	 * Also advance to the oldest segment of callbacks whose | ||||
| 	 * ->gp_seq[] completion is at or after that passed in via "seq", | ||||
| 	 * skipping any empty segments. | ||||
| 	 */ | ||||
| 	if (++i >= RCU_NEXT_TAIL) | ||||
| 		return false; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Merge all later callbacks, including newly arrived callbacks, | ||||
| 	 * into the segment located by the for-loop above.  Assign "seq" | ||||
| 	 * as the ->gp_seq[] value in order to correctly handle the case | ||||
| 	 * where there were no pending callbacks in the rcu_segcblist | ||||
| 	 * structure other than in the RCU_NEXT_TAIL segment. | ||||
| 	 */ | ||||
| 	for (; i < RCU_NEXT_TAIL; i++) { | ||||
| 		rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; | ||||
| 		rsclp->gp_seq[i] = seq; | ||||
| 	} | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Scan the specified rcu_segcblist structure for callbacks that need | ||||
|  * a grace period later than the one specified by "seq".  We don't look | ||||
|  * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't | ||||
|  * have a grace-period sequence number. | ||||
|  */ | ||||
| static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, | ||||
| 						  unsigned long seq) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) | ||||
| 		if (rsclp->tails[i - 1] != rsclp->tails[i] && | ||||
| 		    ULONG_CMP_LT(seq, rsclp->gp_seq[i])) | ||||
| 			return true; | ||||
| 	return false; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Interim function to return rcu_segcblist head pointer.  Longer term, the | ||||
|  * rcu_segcblist will be used more pervasively, removing the need for this | ||||
|  * function. | ||||
|  */ | ||||
| static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	return rsclp->head; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Interim function to return rcu_segcblist head pointer.  Longer term, the | ||||
|  * rcu_segcblist will be used more pervasively, removing the need for this | ||||
|  * function. | ||||
|  */ | ||||
| static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) | ||||
| { | ||||
| 	WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); | ||||
| 	return rsclp->tails[RCU_NEXT_TAIL]; | ||||
| } | ||||
| 
 | ||||
| #endif /* __KERNEL_RCU_SEGCBLIST_H */ | ||||
|  | @ -509,7 +509,8 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, | |||
| { | ||||
| 	struct hlist_node *i, *last = NULL; | ||||
| 
 | ||||
| 	for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) | ||||
| 	/* Note: write side code, so rcu accessors are not needed. */ | ||||
| 	for (i = h->first; i; i = i->next) | ||||
| 		last = i; | ||||
| 
 | ||||
| 	if (last) { | ||||
|  |  | |||
|  | @ -363,15 +363,20 @@ static inline void rcu_init_nohz(void) | |||
| #ifdef CONFIG_TASKS_RCU | ||||
| #define TASKS_RCU(x) x | ||||
| extern struct srcu_struct tasks_rcu_exit_srcu; | ||||
| #define rcu_note_voluntary_context_switch(t) \ | ||||
| #define rcu_note_voluntary_context_switch_lite(t) \ | ||||
| 	do { \ | ||||
| 		rcu_all_qs(); \ | ||||
| 		if (READ_ONCE((t)->rcu_tasks_holdout)) \ | ||||
| 			WRITE_ONCE((t)->rcu_tasks_holdout, false); \ | ||||
| 	} while (0) | ||||
| #define rcu_note_voluntary_context_switch(t) \ | ||||
| 	do { \ | ||||
| 		rcu_all_qs(); \ | ||||
| 		rcu_note_voluntary_context_switch_lite(t); \ | ||||
| 	} while (0) | ||||
| #else /* #ifdef CONFIG_TASKS_RCU */ | ||||
| #define TASKS_RCU(x) do { } while (0) | ||||
| #define rcu_note_voluntary_context_switch(t)	rcu_all_qs() | ||||
| #define rcu_note_voluntary_context_switch_lite(t)	do { } while (0) | ||||
| #define rcu_note_voluntary_context_switch(t)		rcu_all_qs() | ||||
| #endif /* #else #ifdef CONFIG_TASKS_RCU */ | ||||
| 
 | ||||
| /**
 | ||||
|  | @ -1127,11 +1132,11 @@ do { \ | |||
|  * if the UNLOCK and LOCK are executed by the same CPU or if the | ||||
|  * UNLOCK and LOCK operate on the same lock variable. | ||||
|  */ | ||||
| #ifdef CONFIG_PPC | ||||
| #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE | ||||
| #define smp_mb__after_unlock_lock()	smp_mb()  /* Full ordering for lock. */ | ||||
| #else /* #ifdef CONFIG_PPC */ | ||||
| #else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ | ||||
| #define smp_mb__after_unlock_lock()	do { } while (0) | ||||
| #endif /* #else #ifdef CONFIG_PPC */ | ||||
| #endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ | ||||
| 
 | ||||
| 
 | ||||
| #endif /* __LINUX_RCUPDATE_H */ | ||||
|  |  | |||
|  | @ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) | |||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| static inline bool rcu_eqs_special_set(int cpu) | ||||
| { | ||||
| 	return false;  /* Never flag non-existent other CPUs! */ | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long get_state_synchronize_rcu(void) | ||||
| { | ||||
| 	return 0; | ||||
|  | @ -87,10 +92,11 @@ static inline void kfree_call_rcu(struct rcu_head *head, | |||
| 	call_rcu(head, func); | ||||
| } | ||||
| 
 | ||||
| static inline void rcu_note_context_switch(void) | ||||
| { | ||||
| 	rcu_sched_qs(); | ||||
| } | ||||
| #define rcu_note_context_switch(preempt) \ | ||||
| 	do { \ | ||||
| 		rcu_sched_qs(); \ | ||||
| 		rcu_note_voluntary_context_switch_lite(current); \ | ||||
| 	} while (0) | ||||
| 
 | ||||
| /*
 | ||||
|  * Take advantage of the fact that there is only one CPU, which | ||||
|  | @ -212,14 +218,14 @@ static inline void exit_rcu(void) | |||
| { | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) | ||||
| extern int rcu_scheduler_active __read_mostly; | ||||
| void rcu_scheduler_starting(void); | ||||
| #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| #else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ | ||||
| static inline void rcu_scheduler_starting(void) | ||||
| { | ||||
| } | ||||
| #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| #endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ | ||||
| 
 | ||||
| #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) | ||||
| 
 | ||||
|  | @ -237,6 +243,10 @@ static inline bool rcu_is_watching(void) | |||
| 
 | ||||
| #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ | ||||
| 
 | ||||
| static inline void rcu_request_urgent_qs_task(struct task_struct *t) | ||||
| { | ||||
| } | ||||
| 
 | ||||
| static inline void rcu_all_qs(void) | ||||
| { | ||||
| 	barrier(); /* Avoid RCU read-side critical sections leaking across. */ | ||||
|  |  | |||
|  | @ -30,7 +30,7 @@ | |||
| #ifndef __LINUX_RCUTREE_H | ||||
| #define __LINUX_RCUTREE_H | ||||
| 
 | ||||
| void rcu_note_context_switch(void); | ||||
| void rcu_note_context_switch(bool preempt); | ||||
| int rcu_needs_cpu(u64 basem, u64 *nextevt); | ||||
| void rcu_cpu_stall_reset(void); | ||||
| 
 | ||||
|  | @ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void); | |||
|  */ | ||||
| static inline void rcu_virt_note_context_switch(int cpu) | ||||
| { | ||||
| 	rcu_note_context_switch(); | ||||
| 	rcu_note_context_switch(false); | ||||
| } | ||||
| 
 | ||||
| void synchronize_rcu_bh(void); | ||||
|  | @ -108,6 +108,7 @@ void rcu_scheduler_starting(void); | |||
| extern int rcu_scheduler_active __read_mostly; | ||||
| 
 | ||||
| bool rcu_is_watching(void); | ||||
| void rcu_request_urgent_qs_task(struct task_struct *t); | ||||
| 
 | ||||
| void rcu_all_qs(void); | ||||
| 
 | ||||
|  |  | |||
|  | @ -28,7 +28,7 @@ | |||
| #define SLAB_STORE_USER		0x00010000UL	/* DEBUG: Store the last owner for bug hunting */ | ||||
| #define SLAB_PANIC		0x00040000UL	/* Panic if kmem_cache_create() fails */ | ||||
| /*
 | ||||
|  * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! | ||||
|  * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! | ||||
|  * | ||||
|  * This delays freeing the SLAB page by a grace period, it does _NOT_ | ||||
|  * delay object freeing. This means that if you do kmem_cache_free() | ||||
|  | @ -61,8 +61,10 @@ | |||
|  * | ||||
|  * rcu_read_lock before reading the address, then rcu_read_unlock after | ||||
|  * taking the spinlock within the structure expected at that address. | ||||
|  * | ||||
|  * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. | ||||
|  */ | ||||
| #define SLAB_DESTROY_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */ | ||||
| #define SLAB_TYPESAFE_BY_RCU	0x00080000UL	/* Defer freeing slabs to RCU */ | ||||
| #define SLAB_MEM_SPREAD		0x00100000UL	/* Spread some memory over cpuset */ | ||||
| #define SLAB_TRACE		0x00200000UL	/* Trace allocations and frees */ | ||||
| 
 | ||||
|  |  | |||
|  | @ -22,7 +22,7 @@ | |||
|  *	   Lai Jiangshan <laijs@cn.fujitsu.com> | ||||
|  * | ||||
|  * For detailed explanation of Read-Copy Update mechanism see - | ||||
|  * 		Documentation/RCU/ *.txt | ||||
|  *		Documentation/RCU/ *.txt | ||||
|  * | ||||
|  */ | ||||
| 
 | ||||
|  | @ -32,35 +32,9 @@ | |||
| #include <linux/mutex.h> | ||||
| #include <linux/rcupdate.h> | ||||
| #include <linux/workqueue.h> | ||||
| #include <linux/rcu_segcblist.h> | ||||
| 
 | ||||
| struct srcu_array { | ||||
| 	unsigned long lock_count[2]; | ||||
| 	unsigned long unlock_count[2]; | ||||
| }; | ||||
| 
 | ||||
| struct rcu_batch { | ||||
| 	struct rcu_head *head, **tail; | ||||
| }; | ||||
| 
 | ||||
| #define RCU_BATCH_INIT(name) { NULL, &(name.head) } | ||||
| 
 | ||||
| struct srcu_struct { | ||||
| 	unsigned long completed; | ||||
| 	struct srcu_array __percpu *per_cpu_ref; | ||||
| 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */ | ||||
| 	bool running; | ||||
| 	/* callbacks just queued */ | ||||
| 	struct rcu_batch batch_queue; | ||||
| 	/* callbacks try to do the first check_zero */ | ||||
| 	struct rcu_batch batch_check0; | ||||
| 	/* callbacks done with the first check_zero and the flip */ | ||||
| 	struct rcu_batch batch_check1; | ||||
| 	struct rcu_batch batch_done; | ||||
| 	struct delayed_work work; | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 	struct lockdep_map dep_map; | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| }; | ||||
| struct srcu_struct; | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 
 | ||||
|  | @ -82,46 +56,15 @@ int init_srcu_struct(struct srcu_struct *sp); | |||
| #define __SRCU_DEP_MAP_INIT(srcu_name) | ||||
| #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| void process_srcu(struct work_struct *work); | ||||
| 
 | ||||
| #define __SRCU_STRUCT_INIT(name)					\ | ||||
| 	{								\ | ||||
| 		.completed = -300,					\ | ||||
| 		.per_cpu_ref = &name##_srcu_array,			\ | ||||
| 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\ | ||||
| 		.running = false,					\ | ||||
| 		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\ | ||||
| 		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\ | ||||
| 		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\ | ||||
| 		.batch_done = RCU_BATCH_INIT(name.batch_done),		\ | ||||
| 		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ | ||||
| 		__SRCU_DEP_MAP_INIT(name)				\ | ||||
| 	} | ||||
| 
 | ||||
| /*
 | ||||
|  * Define and initialize a srcu struct at build time. | ||||
|  * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. | ||||
|  * | ||||
|  * Note that although DEFINE_STATIC_SRCU() hides the name from other | ||||
|  * files, the per-CPU variable rules nevertheless require that the | ||||
|  * chosen name be globally unique.  These rules also prohibit use of | ||||
|  * DEFINE_STATIC_SRCU() within a function.  If these rules are too | ||||
|  * restrictive, declare the srcu_struct manually.  For example, in | ||||
|  * each file: | ||||
|  * | ||||
|  *	static struct srcu_struct my_srcu; | ||||
|  * | ||||
|  * Then, before the first use of each my_srcu, manually initialize it: | ||||
|  * | ||||
|  *	init_srcu_struct(&my_srcu); | ||||
|  * | ||||
|  * See include/linux/percpu-defs.h for the rules on per-CPU variables. | ||||
|  */ | ||||
| #define __DEFINE_SRCU(name, is_static)					\ | ||||
| 	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ | ||||
| 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | ||||
| #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */) | ||||
| #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static) | ||||
| #ifdef CONFIG_TINY_SRCU | ||||
| #include <linux/srcutiny.h> | ||||
| #elif defined(CONFIG_TREE_SRCU) | ||||
| #include <linux/srcutree.h> | ||||
| #elif defined(CONFIG_CLASSIC_SRCU) | ||||
| #include <linux/srcuclassic.h> | ||||
| #else | ||||
| #error "Unknown SRCU implementation specified to kernel configuration" | ||||
| #endif | ||||
| 
 | ||||
| /**
 | ||||
|  * call_srcu() - Queue a callback for invocation after an SRCU grace period | ||||
|  | @ -147,9 +90,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp); | |||
| int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); | ||||
| void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); | ||||
| void synchronize_srcu(struct srcu_struct *sp); | ||||
| void synchronize_srcu_expedited(struct srcu_struct *sp); | ||||
| unsigned long srcu_batches_completed(struct srcu_struct *sp); | ||||
| void srcu_barrier(struct srcu_struct *sp); | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										101
									
								
								include/linux/srcuclassic.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								include/linux/srcuclassic.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,101 @@ | |||
| /*
 | ||||
|  * Sleepable Read-Copy Update mechanism for mutual exclusion, | ||||
|  *	classic v4.11 variant. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright (C) IBM Corporation, 2017 | ||||
|  * | ||||
|  * Author: Paul McKenney <paulmck@us.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #ifndef _LINUX_SRCU_CLASSIC_H | ||||
| #define _LINUX_SRCU_CLASSIC_H | ||||
| 
 | ||||
| struct srcu_array { | ||||
| 	unsigned long lock_count[2]; | ||||
| 	unsigned long unlock_count[2]; | ||||
| }; | ||||
| 
 | ||||
| struct rcu_batch { | ||||
| 	struct rcu_head *head, **tail; | ||||
| }; | ||||
| 
 | ||||
| #define RCU_BATCH_INIT(name) { NULL, &(name.head) } | ||||
| 
 | ||||
| struct srcu_struct { | ||||
| 	unsigned long completed; | ||||
| 	struct srcu_array __percpu *per_cpu_ref; | ||||
| 	spinlock_t queue_lock; /* protect ->batch_queue, ->running */ | ||||
| 	bool running; | ||||
| 	/* callbacks just queued */ | ||||
| 	struct rcu_batch batch_queue; | ||||
| 	/* callbacks try to do the first check_zero */ | ||||
| 	struct rcu_batch batch_check0; | ||||
| 	/* callbacks done with the first check_zero and the flip */ | ||||
| 	struct rcu_batch batch_check1; | ||||
| 	struct rcu_batch batch_done; | ||||
| 	struct delayed_work work; | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 	struct lockdep_map dep_map; | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| }; | ||||
| 
 | ||||
| void process_srcu(struct work_struct *work); | ||||
| 
 | ||||
| #define __SRCU_STRUCT_INIT(name)					\ | ||||
| 	{								\ | ||||
| 		.completed = -300,					\ | ||||
| 		.per_cpu_ref = &name##_srcu_array,			\ | ||||
| 		.queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock),	\ | ||||
| 		.running = false,					\ | ||||
| 		.batch_queue = RCU_BATCH_INIT(name.batch_queue),	\ | ||||
| 		.batch_check0 = RCU_BATCH_INIT(name.batch_check0),	\ | ||||
| 		.batch_check1 = RCU_BATCH_INIT(name.batch_check1),	\ | ||||
| 		.batch_done = RCU_BATCH_INIT(name.batch_done),		\ | ||||
| 		.work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ | ||||
| 		__SRCU_DEP_MAP_INIT(name)				\ | ||||
| 	} | ||||
| 
 | ||||
| /*
 | ||||
|  * Define and initialize a srcu struct at build time. | ||||
|  * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. | ||||
|  * | ||||
|  * Note that although DEFINE_STATIC_SRCU() hides the name from other | ||||
|  * files, the per-CPU variable rules nevertheless require that the | ||||
|  * chosen name be globally unique.  These rules also prohibit use of | ||||
|  * DEFINE_STATIC_SRCU() within a function.  If these rules are too | ||||
|  * restrictive, declare the srcu_struct manually.  For example, in | ||||
|  * each file: | ||||
|  * | ||||
|  *	static struct srcu_struct my_srcu; | ||||
|  * | ||||
|  * Then, before the first use of each my_srcu, manually initialize it: | ||||
|  * | ||||
|  *	init_srcu_struct(&my_srcu); | ||||
|  * | ||||
|  * See include/linux/percpu-defs.h for the rules on per-CPU variables. | ||||
|  */ | ||||
| #define __DEFINE_SRCU(name, is_static)					\ | ||||
| 	static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ | ||||
| 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | ||||
| #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */) | ||||
| #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static) | ||||
| 
 | ||||
| void synchronize_srcu_expedited(struct srcu_struct *sp); | ||||
| void srcu_barrier(struct srcu_struct *sp); | ||||
| unsigned long srcu_batches_completed(struct srcu_struct *sp); | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										81
									
								
								include/linux/srcutiny.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										81
									
								
								include/linux/srcutiny.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,81 @@ | |||
| /*
 | ||||
|  * Sleepable Read-Copy Update mechanism for mutual exclusion, | ||||
|  *	tiny variant. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright (C) IBM Corporation, 2017 | ||||
|  * | ||||
|  * Author: Paul McKenney <paulmck@us.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #ifndef _LINUX_SRCU_TINY_H | ||||
| #define _LINUX_SRCU_TINY_H | ||||
| 
 | ||||
| #include <linux/swait.h> | ||||
| 
 | ||||
| struct srcu_struct { | ||||
| 	int srcu_lock_nesting[2];	/* srcu_read_lock() nesting depth. */ | ||||
| 	struct swait_queue_head srcu_wq; | ||||
| 					/* Last srcu_read_unlock() wakes GP. */ | ||||
| 	unsigned long srcu_gp_seq;	/* GP seq # for callback tagging. */ | ||||
| 	struct rcu_segcblist srcu_cblist; | ||||
| 					/* Pending SRCU callbacks. */ | ||||
| 	int srcu_idx;			/* Current reader array element. */ | ||||
| 	bool srcu_gp_running;		/* GP workqueue running? */ | ||||
| 	bool srcu_gp_waiting;		/* GP waiting for readers? */ | ||||
| 	struct work_struct srcu_work;	/* For driving grace periods. */ | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 	struct lockdep_map dep_map; | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| }; | ||||
| 
 | ||||
| void srcu_drive_gp(struct work_struct *wp); | ||||
| 
 | ||||
| #define __SRCU_STRUCT_INIT(name)					\ | ||||
| {									\ | ||||
| 	.srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq),	\ | ||||
| 	.srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),	\ | ||||
| 	.srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp),	\ | ||||
| 	__SRCU_DEP_MAP_INIT(name)					\ | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This odd _STATIC_ arrangement is needed for API compatibility with | ||||
|  * Tree SRCU, which needs some per-CPU data. | ||||
|  */ | ||||
| #define DEFINE_SRCU(name) \ | ||||
| 	struct srcu_struct name = __SRCU_STRUCT_INIT(name) | ||||
| #define DEFINE_STATIC_SRCU(name) \ | ||||
| 	static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | ||||
| 
 | ||||
| void synchronize_srcu(struct srcu_struct *sp); | ||||
| 
 | ||||
| static inline void synchronize_srcu_expedited(struct srcu_struct *sp) | ||||
| { | ||||
| 	synchronize_srcu(sp); | ||||
| } | ||||
| 
 | ||||
| static inline void srcu_barrier(struct srcu_struct *sp) | ||||
| { | ||||
| 	synchronize_srcu(sp); | ||||
| } | ||||
| 
 | ||||
| static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) | ||||
| { | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| #endif | ||||
							
								
								
									
										139
									
								
								include/linux/srcutree.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										139
									
								
								include/linux/srcutree.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,139 @@ | |||
| /*
 | ||||
|  * Sleepable Read-Copy Update mechanism for mutual exclusion, | ||||
|  *	tree variant. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright (C) IBM Corporation, 2017 | ||||
|  * | ||||
|  * Author: Paul McKenney <paulmck@us.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #ifndef _LINUX_SRCU_TREE_H | ||||
| #define _LINUX_SRCU_TREE_H | ||||
| 
 | ||||
| #include <linux/rcu_node_tree.h> | ||||
| #include <linux/completion.h> | ||||
| 
 | ||||
| struct srcu_node; | ||||
| struct srcu_struct; | ||||
| 
 | ||||
| /*
 | ||||
|  * Per-CPU structure feeding into leaf srcu_node, similar in function | ||||
|  * to rcu_node. | ||||
|  */ | ||||
| struct srcu_data { | ||||
| 	/* Read-side state. */ | ||||
| 	unsigned long srcu_lock_count[2];	/* Locks per CPU. */ | ||||
| 	unsigned long srcu_unlock_count[2];	/* Unlocks per CPU. */ | ||||
| 
 | ||||
| 	/* Update-side state. */ | ||||
| 	spinlock_t lock ____cacheline_internodealigned_in_smp; | ||||
| 	struct rcu_segcblist srcu_cblist;	/* List of callbacks.*/ | ||||
| 	unsigned long srcu_gp_seq_needed;	/* Furthest future GP needed. */ | ||||
| 	bool srcu_cblist_invoking;		/* Invoking these CBs? */ | ||||
| 	struct delayed_work work;		/* Context for CB invoking. */ | ||||
| 	struct rcu_head srcu_barrier_head;	/* For srcu_barrier() use. */ | ||||
| 	struct srcu_node *mynode;		/* Leaf srcu_node. */ | ||||
| 	int cpu; | ||||
| 	struct srcu_struct *sp; | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Node in SRCU combining tree, similar in function to rcu_data. | ||||
|  */ | ||||
| struct srcu_node { | ||||
| 	spinlock_t lock; | ||||
| 	unsigned long srcu_have_cbs[4];		/* GP seq for children */ | ||||
| 						/*  having CBs, but only */ | ||||
| 						/*  is > ->srcu_gq_seq. */ | ||||
| 	struct srcu_node *srcu_parent;		/* Next up in tree. */ | ||||
| 	int grplo;				/* Least CPU for node. */ | ||||
| 	int grphi;				/* Biggest CPU for node. */ | ||||
| }; | ||||
| 
 | ||||
| /*
 | ||||
|  * Per-SRCU-domain structure, similar in function to rcu_state. | ||||
|  */ | ||||
| struct srcu_struct { | ||||
| 	struct srcu_node node[NUM_RCU_NODES];	/* Combining tree. */ | ||||
| 	struct srcu_node *level[RCU_NUM_LVLS + 1]; | ||||
| 						/* First node at each level. */ | ||||
| 	struct mutex srcu_cb_mutex;		/* Serialize CB preparation. */ | ||||
| 	spinlock_t gp_lock;			/* protect ->srcu_cblist */ | ||||
| 	struct mutex srcu_gp_mutex;		/* Serialize GP work. */ | ||||
| 	unsigned int srcu_idx;			/* Current rdr array element. */ | ||||
| 	unsigned long srcu_gp_seq;		/* Grace-period seq #. */ | ||||
| 	unsigned long srcu_gp_seq_needed;	/* Latest gp_seq needed. */ | ||||
| 	atomic_t srcu_exp_cnt;			/* # ongoing expedited GPs. */ | ||||
| 	struct srcu_data __percpu *sda;		/* Per-CPU srcu_data array. */ | ||||
| 	unsigned long srcu_barrier_seq;		/* srcu_barrier seq #. */ | ||||
| 	struct mutex srcu_barrier_mutex;	/* Serialize barrier ops. */ | ||||
| 	struct completion srcu_barrier_completion; | ||||
| 						/* Awaken barrier rq at end. */ | ||||
| 	atomic_t srcu_barrier_cpu_cnt;		/* # CPUs not yet posting a */ | ||||
| 						/*  callback for the barrier */ | ||||
| 						/*  operation. */ | ||||
| 	struct delayed_work work; | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 	struct lockdep_map dep_map; | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| }; | ||||
| 
 | ||||
| /* Values for state variable (bottom bits of ->srcu_gp_seq). */ | ||||
| #define SRCU_STATE_IDLE		0 | ||||
| #define SRCU_STATE_SCAN1	1 | ||||
| #define SRCU_STATE_SCAN2	2 | ||||
| 
 | ||||
| void process_srcu(struct work_struct *work); | ||||
| 
 | ||||
| #define __SRCU_STRUCT_INIT(name)					\ | ||||
| 	{								\ | ||||
| 		.sda = &name##_srcu_data,				\ | ||||
| 		.gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock),		\ | ||||
| 		.srcu_gp_seq_needed = 0 - 1,				\ | ||||
| 		__SRCU_DEP_MAP_INIT(name)				\ | ||||
| 	} | ||||
| 
 | ||||
| /*
 | ||||
|  * Define and initialize a srcu struct at build time. | ||||
|  * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. | ||||
|  * | ||||
|  * Note that although DEFINE_STATIC_SRCU() hides the name from other | ||||
|  * files, the per-CPU variable rules nevertheless require that the | ||||
|  * chosen name be globally unique.  These rules also prohibit use of | ||||
|  * DEFINE_STATIC_SRCU() within a function.  If these rules are too | ||||
|  * restrictive, declare the srcu_struct manually.  For example, in | ||||
|  * each file: | ||||
|  * | ||||
|  *	static struct srcu_struct my_srcu; | ||||
|  * | ||||
|  * Then, before the first use of each my_srcu, manually initialize it: | ||||
|  * | ||||
|  *	init_srcu_struct(&my_srcu); | ||||
|  * | ||||
|  * See include/linux/percpu-defs.h for the rules on per-CPU variables. | ||||
|  */ | ||||
| #define __DEFINE_SRCU(name, is_static)					\ | ||||
| 	static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ | ||||
| 	is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) | ||||
| #define DEFINE_SRCU(name)		__DEFINE_SRCU(name, /* not static */) | ||||
| #define DEFINE_STATIC_SRCU(name)	__DEFINE_SRCU(name, static) | ||||
| 
 | ||||
| void synchronize_srcu_expedited(struct srcu_struct *sp); | ||||
| void srcu_barrier(struct srcu_struct *sp); | ||||
| unsigned long srcu_batches_completed(struct srcu_struct *sp); | ||||
| 
 | ||||
| #endif | ||||
|  | @ -209,7 +209,7 @@ struct ustat { | |||
|  * naturally due ABI requirements, but some architectures (like CRIS) have | ||||
|  * weird ABI and we need to ask it explicitly. | ||||
|  * | ||||
|  * The alignment is required to guarantee that bits 0 and 1 of @next will be | ||||
|  * The alignment is required to guarantee that bit 0 of @next will be | ||||
|  * clear under normal conditions -- as long as we use call_rcu(), | ||||
|  * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. | ||||
|  * | ||||
|  |  | |||
|  | @ -995,7 +995,7 @@ struct smc_hashinfo; | |||
| struct module; | ||||
| 
 | ||||
| /*
 | ||||
|  * caches using SLAB_DESTROY_BY_RCU should let .next pointer from nulls nodes | ||||
|  * caches using SLAB_TYPESAFE_BY_RCU should let .next pointer from nulls nodes | ||||
|  * un-modified. Special care is taken when initializing object to zero. | ||||
|  */ | ||||
| static inline void sk_prot_clear_nulls(struct sock *sk, int size) | ||||
|  |  | |||
							
								
								
									
										39
									
								
								init/Kconfig
									
										
									
									
									
								
							
							
						
						
									
										39
									
								
								init/Kconfig
									
										
									
									
									
								
							|  | @ -526,6 +526,35 @@ config SRCU | |||
| 	  permits arbitrary sleeping or blocking within RCU read-side critical | ||||
| 	  sections. | ||||
| 
 | ||||
| config CLASSIC_SRCU | ||||
| 	bool "Use v4.11 classic SRCU implementation" | ||||
| 	default n | ||||
| 	depends on RCU_EXPERT && SRCU | ||||
| 	help | ||||
| 	  This option selects the traditional well-tested classic SRCU | ||||
| 	  implementation from v4.11, as might be desired for enterprise | ||||
| 	  Linux distributions.  Without this option, the shiny new | ||||
| 	  Tiny SRCU and Tree SRCU implementations are used instead. | ||||
| 	  At some point, it is hoped that Tiny SRCU and Tree SRCU | ||||
| 	  will accumulate enough test time and confidence to allow | ||||
| 	  Classic SRCU to be dropped entirely. | ||||
| 
 | ||||
| 	  Say Y if you need a rock-solid SRCU. | ||||
| 
 | ||||
| 	  Say N if you would like help test Tree SRCU. | ||||
| 
 | ||||
| config TINY_SRCU | ||||
| 	bool | ||||
| 	default y if TINY_RCU && !CLASSIC_SRCU | ||||
| 	help | ||||
| 	  This option selects the single-CPU non-preemptible version of SRCU. | ||||
| 
 | ||||
| config TREE_SRCU | ||||
| 	bool | ||||
| 	default y if !TINY_RCU && !CLASSIC_SRCU | ||||
| 	help | ||||
| 	  This option selects the full-fledged version of SRCU. | ||||
| 
 | ||||
| config TASKS_RCU | ||||
| 	bool | ||||
| 	default n | ||||
|  | @ -612,11 +641,17 @@ config RCU_FANOUT_LEAF | |||
| 	  initialization.  These systems tend to run CPU-bound, and thus | ||||
| 	  are not helped by synchronized interrupts, and thus tend to | ||||
| 	  skew them, which reduces lock contention enough that large | ||||
| 	  leaf-level fanouts work well. | ||||
| 	  leaf-level fanouts work well.  That said, setting leaf-level | ||||
| 	  fanout to a large number will likely cause problematic | ||||
| 	  lock contention on the leaf-level rcu_node structures unless | ||||
| 	  you boot with the skew_tick kernel parameter. | ||||
| 
 | ||||
| 	  Select a specific number if testing RCU itself. | ||||
| 
 | ||||
| 	  Select the maximum permissible value for large systems. | ||||
| 	  Select the maximum permissible value for large systems, but | ||||
| 	  please understand that you may also need to set the skew_tick | ||||
| 	  kernel boot parameter to avoid contention on the rcu_node | ||||
| 	  structure's locks. | ||||
| 
 | ||||
| 	  Take the default if unsure. | ||||
| 
 | ||||
|  |  | |||
|  | @ -1313,7 +1313,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) | |||
| 	if (atomic_dec_and_test(&sighand->count)) { | ||||
| 		signalfd_cleanup(sighand); | ||||
| 		/*
 | ||||
| 		 * sighand_cachep is SLAB_DESTROY_BY_RCU so we can free it | ||||
| 		 * sighand_cachep is SLAB_TYPESAFE_BY_RCU so we can free it | ||||
| 		 * without an RCU grace period, see __lock_task_sighand(). | ||||
| 		 */ | ||||
| 		kmem_cache_free(sighand_cachep, sighand); | ||||
|  | @ -2144,7 +2144,7 @@ void __init proc_caches_init(void) | |||
| { | ||||
| 	sighand_cachep = kmem_cache_create("sighand_cache", | ||||
| 			sizeof(struct sighand_struct), 0, | ||||
| 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU| | ||||
| 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| | ||||
| 			SLAB_NOTRACK|SLAB_ACCOUNT, sighand_ctor); | ||||
| 	signal_cachep = kmem_cache_create("signal_cache", | ||||
| 			sizeof(struct signal_struct), 0, | ||||
|  |  | |||
|  | @ -1144,10 +1144,10 @@ print_circular_bug_header(struct lock_list *entry, unsigned int depth, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("======================================================\n"); | ||||
| 	printk("[ INFO: possible circular locking dependency detected ]\n"); | ||||
| 	pr_warn("======================================================\n"); | ||||
| 	pr_warn("WARNING: possible circular locking dependency detected\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("-------------------------------------------------------\n"); | ||||
| 	pr_warn("------------------------------------------------------\n"); | ||||
| 	printk("%s/%d is trying to acquire lock:\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(check_src); | ||||
|  | @ -1482,11 +1482,11 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("======================================================\n"); | ||||
| 	printk("[ INFO: %s-safe -> %s-unsafe lock order detected ]\n", | ||||
| 	pr_warn("=====================================================\n"); | ||||
| 	pr_warn("WARNING: %s-safe -> %s-unsafe lock order detected\n", | ||||
| 		irqclass, irqclass); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("------------------------------------------------------\n"); | ||||
| 	pr_warn("-----------------------------------------------------\n"); | ||||
| 	printk("%s/%d [HC%u[%lu]:SC%u[%lu]:HE%u:SE%u] is trying to acquire:\n", | ||||
| 		curr->comm, task_pid_nr(curr), | ||||
| 		curr->hardirq_context, hardirq_count() >> HARDIRQ_SHIFT, | ||||
|  | @ -1711,10 +1711,10 @@ print_deadlock_bug(struct task_struct *curr, struct held_lock *prev, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=============================================\n"); | ||||
| 	printk("[ INFO: possible recursive locking detected ]\n"); | ||||
| 	pr_warn("============================================\n"); | ||||
| 	pr_warn("WARNING: possible recursive locking detected\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("---------------------------------------------\n"); | ||||
| 	pr_warn("--------------------------------------------\n"); | ||||
| 	printk("%s/%d is trying to acquire lock:\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(next); | ||||
|  | @ -2061,10 +2061,10 @@ static void print_collision(struct task_struct *curr, | |||
| 			struct lock_chain *chain) | ||||
| { | ||||
| 	printk("\n"); | ||||
| 	printk("======================\n"); | ||||
| 	printk("[chain_key collision ]\n"); | ||||
| 	pr_warn("============================\n"); | ||||
| 	pr_warn("WARNING: chain_key collision\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("----------------------\n"); | ||||
| 	pr_warn("----------------------------\n"); | ||||
| 	printk("%s/%d: ", current->comm, task_pid_nr(current)); | ||||
| 	printk("Hash chain already cached but the contents don't match!\n"); | ||||
| 
 | ||||
|  | @ -2360,10 +2360,10 @@ print_usage_bug(struct task_struct *curr, struct held_lock *this, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=================================\n"); | ||||
| 	printk("[ INFO: inconsistent lock state ]\n"); | ||||
| 	pr_warn("================================\n"); | ||||
| 	pr_warn("WARNING: inconsistent lock state\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("---------------------------------\n"); | ||||
| 	pr_warn("--------------------------------\n"); | ||||
| 
 | ||||
| 	printk("inconsistent {%s} -> {%s} usage.\n", | ||||
| 		usage_str[prev_bit], usage_str[new_bit]); | ||||
|  | @ -2425,10 +2425,10 @@ print_irq_inversion_bug(struct task_struct *curr, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=========================================================\n"); | ||||
| 	printk("[ INFO: possible irq lock inversion dependency detected ]\n"); | ||||
| 	pr_warn("========================================================\n"); | ||||
| 	pr_warn("WARNING: possible irq lock inversion dependency detected\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("---------------------------------------------------------\n"); | ||||
| 	pr_warn("--------------------------------------------------------\n"); | ||||
| 	printk("%s/%d just changed the state of lock:\n", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(this); | ||||
|  | @ -3170,10 +3170,10 @@ print_lock_nested_lock_not_held(struct task_struct *curr, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("==================================\n"); | ||||
| 	printk("[ BUG: Nested lock was not taken ]\n"); | ||||
| 	pr_warn("==================================\n"); | ||||
| 	pr_warn("WARNING: Nested lock was not taken\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("----------------------------------\n"); | ||||
| 	pr_warn("----------------------------------\n"); | ||||
| 
 | ||||
| 	printk("%s/%d is trying to lock:\n", curr->comm, task_pid_nr(curr)); | ||||
| 	print_lock(hlock); | ||||
|  | @ -3383,10 +3383,10 @@ print_unlock_imbalance_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=====================================\n"); | ||||
| 	printk("[ BUG: bad unlock balance detected! ]\n"); | ||||
| 	pr_warn("=====================================\n"); | ||||
| 	pr_warn("WARNING: bad unlock balance detected!\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("-------------------------------------\n"); | ||||
| 	pr_warn("-------------------------------------\n"); | ||||
| 	printk("%s/%d is trying to release lock (", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lockdep_cache(lock); | ||||
|  | @ -3880,10 +3880,10 @@ print_lock_contention_bug(struct task_struct *curr, struct lockdep_map *lock, | |||
| 		return 0; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=================================\n"); | ||||
| 	printk("[ BUG: bad contention detected! ]\n"); | ||||
| 	pr_warn("=================================\n"); | ||||
| 	pr_warn("WARNING: bad contention detected!\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("---------------------------------\n"); | ||||
| 	pr_warn("---------------------------------\n"); | ||||
| 	printk("%s/%d is trying to contend lock (", | ||||
| 		curr->comm, task_pid_nr(curr)); | ||||
| 	print_lockdep_cache(lock); | ||||
|  | @ -4244,10 +4244,10 @@ print_freed_lock_bug(struct task_struct *curr, const void *mem_from, | |||
| 		return; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=========================\n"); | ||||
| 	printk("[ BUG: held lock freed! ]\n"); | ||||
| 	pr_warn("=========================\n"); | ||||
| 	pr_warn("WARNING: held lock freed!\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("-------------------------\n"); | ||||
| 	pr_warn("-------------------------\n"); | ||||
| 	printk("%s/%d is freeing memory %p-%p, with a lock still held there!\n", | ||||
| 		curr->comm, task_pid_nr(curr), mem_from, mem_to-1); | ||||
| 	print_lock(hlock); | ||||
|  | @ -4302,11 +4302,11 @@ static void print_held_locks_bug(void) | |||
| 		return; | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=====================================\n"); | ||||
| 	printk("[ BUG: %s/%d still has locks held! ]\n", | ||||
| 	pr_warn("====================================\n"); | ||||
| 	pr_warn("WARNING: %s/%d still has locks held!\n", | ||||
| 	       current->comm, task_pid_nr(current)); | ||||
| 	print_kernel_ident(); | ||||
| 	printk("-------------------------------------\n"); | ||||
| 	pr_warn("------------------------------------\n"); | ||||
| 	lockdep_print_held_locks(current); | ||||
| 	printk("\nstack backtrace:\n"); | ||||
| 	dump_stack(); | ||||
|  | @ -4371,7 +4371,7 @@ retry: | |||
| 	} while_each_thread(g, p); | ||||
| 
 | ||||
| 	printk("\n"); | ||||
| 	printk("=============================================\n\n"); | ||||
| 	pr_warn("=============================================\n\n"); | ||||
| 
 | ||||
| 	if (unlock) | ||||
| 		read_unlock(&tasklist_lock); | ||||
|  | @ -4401,10 +4401,10 @@ asmlinkage __visible void lockdep_sys_exit(void) | |||
| 		if (!debug_locks_off()) | ||||
| 			return; | ||||
| 		printk("\n"); | ||||
| 		printk("================================================\n"); | ||||
| 		printk("[ BUG: lock held when returning to user space! ]\n"); | ||||
| 		pr_warn("================================================\n"); | ||||
| 		pr_warn("WARNING: lock held when returning to user space!\n"); | ||||
| 		print_kernel_ident(); | ||||
| 		printk("------------------------------------------------\n"); | ||||
| 		pr_warn("------------------------------------------------\n"); | ||||
| 		printk("%s/%d is leaving the kernel with locks still held!\n", | ||||
| 				curr->comm, curr->pid); | ||||
| 		lockdep_print_held_locks(curr); | ||||
|  | @ -4421,13 +4421,13 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) | |||
| #endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */ | ||||
| 	/* Note: the following can be executed concurrently, so be careful. */ | ||||
| 	printk("\n"); | ||||
| 	pr_err("===============================\n"); | ||||
| 	pr_err("[ ERR: suspicious RCU usage.  ]\n"); | ||||
| 	pr_warn("=============================\n"); | ||||
| 	pr_warn("WARNING: suspicious RCU usage\n"); | ||||
| 	print_kernel_ident(); | ||||
| 	pr_err("-------------------------------\n"); | ||||
| 	pr_err("%s:%d %s!\n", file, line, s); | ||||
| 	pr_err("\nother info that might help us debug this:\n\n"); | ||||
| 	pr_err("\n%srcu_scheduler_active = %d, debug_locks = %d\n", | ||||
| 	pr_warn("-----------------------------\n"); | ||||
| 	printk("%s:%d %s!\n", file, line, s); | ||||
| 	printk("\nother info that might help us debug this:\n\n"); | ||||
| 	printk("\n%srcu_scheduler_active = %d, debug_locks = %d\n", | ||||
| 	       !rcu_lockdep_current_cpu_online() | ||||
| 			? "RCU used illegally from offline CPU!\n" | ||||
| 			: !rcu_is_watching() | ||||
|  |  | |||
|  | @ -102,10 +102,11 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) | |||
| 		return; | ||||
| 	} | ||||
| 
 | ||||
| 	printk("\n============================================\n"); | ||||
| 	printk(  "[ BUG: circular locking deadlock detected! ]\n"); | ||||
| 	printk("%s\n", print_tainted()); | ||||
| 	printk(  "--------------------------------------------\n"); | ||||
| 	pr_warn("\n"); | ||||
| 	pr_warn("============================================\n"); | ||||
| 	pr_warn("WARNING: circular locking deadlock detected!\n"); | ||||
| 	pr_warn("%s\n", print_tainted()); | ||||
| 	pr_warn("--------------------------------------------\n"); | ||||
| 	printk("%s/%d is deadlocking current task %s/%d\n\n", | ||||
| 	       task->comm, task_pid_nr(task), | ||||
| 	       current->comm, task_pid_nr(current)); | ||||
|  |  | |||
|  | @ -3,7 +3,9 @@ | |||
| KCOV_INSTRUMENT := n | ||||
| 
 | ||||
| obj-y += update.o sync.o | ||||
| obj-$(CONFIG_SRCU) += srcu.o | ||||
| obj-$(CONFIG_CLASSIC_SRCU) += srcu.o | ||||
| obj-$(CONFIG_TREE_SRCU) += srcutree.o | ||||
| obj-$(CONFIG_TINY_SRCU) += srcutiny.o | ||||
| obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o | ||||
| obj-$(CONFIG_RCU_PERF_TEST) += rcuperf.o | ||||
| obj-$(CONFIG_TREE_RCU) += tree.o | ||||
|  |  | |||
							
								
								
									
										153
									
								
								kernel/rcu/rcu.h
									
										
									
									
									
								
							
							
						
						
									
										153
									
								
								kernel/rcu/rcu.h
									
										
									
									
									
								
							|  | @ -56,6 +56,83 @@ | |||
| #define DYNTICK_TASK_EXIT_IDLE	   (DYNTICK_TASK_NEST_VALUE + \ | ||||
| 				    DYNTICK_TASK_FLAG) | ||||
| 
 | ||||
| 
 | ||||
| /*
 | ||||
|  * Grace-period counter management. | ||||
|  */ | ||||
| 
 | ||||
| #define RCU_SEQ_CTR_SHIFT	2 | ||||
| #define RCU_SEQ_STATE_MASK	((1 << RCU_SEQ_CTR_SHIFT) - 1) | ||||
| 
 | ||||
| /*
 | ||||
|  * Return the counter portion of a sequence number previously returned | ||||
|  * by rcu_seq_snap() or rcu_seq_current(). | ||||
|  */ | ||||
| static inline unsigned long rcu_seq_ctr(unsigned long s) | ||||
| { | ||||
| 	return s >> RCU_SEQ_CTR_SHIFT; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return the state portion of a sequence number previously returned | ||||
|  * by rcu_seq_snap() or rcu_seq_current(). | ||||
|  */ | ||||
| static inline int rcu_seq_state(unsigned long s) | ||||
| { | ||||
| 	return s & RCU_SEQ_STATE_MASK; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Set the state portion of the pointed-to sequence number. | ||||
|  * The caller is responsible for preventing conflicting updates. | ||||
|  */ | ||||
| static inline void rcu_seq_set_state(unsigned long *sp, int newstate) | ||||
| { | ||||
| 	WARN_ON_ONCE(newstate & ~RCU_SEQ_STATE_MASK); | ||||
| 	WRITE_ONCE(*sp, (*sp & ~RCU_SEQ_STATE_MASK) + newstate); | ||||
| } | ||||
| 
 | ||||
| /* Adjust sequence number for start of update-side operation. */ | ||||
| static inline void rcu_seq_start(unsigned long *sp) | ||||
| { | ||||
| 	WRITE_ONCE(*sp, *sp + 1); | ||||
| 	smp_mb(); /* Ensure update-side operation after counter increment. */ | ||||
| 	WARN_ON_ONCE(rcu_seq_state(*sp) != 1); | ||||
| } | ||||
| 
 | ||||
| /* Adjust sequence number for end of update-side operation. */ | ||||
| static inline void rcu_seq_end(unsigned long *sp) | ||||
| { | ||||
| 	smp_mb(); /* Ensure update-side operation before counter increment. */ | ||||
| 	WARN_ON_ONCE(!rcu_seq_state(*sp)); | ||||
| 	WRITE_ONCE(*sp, (*sp | RCU_SEQ_STATE_MASK) + 1); | ||||
| } | ||||
| 
 | ||||
| /* Take a snapshot of the update side's sequence number. */ | ||||
| static inline unsigned long rcu_seq_snap(unsigned long *sp) | ||||
| { | ||||
| 	unsigned long s; | ||||
| 
 | ||||
| 	s = (READ_ONCE(*sp) + 2 * RCU_SEQ_STATE_MASK + 1) & ~RCU_SEQ_STATE_MASK; | ||||
| 	smp_mb(); /* Above access must not bleed into critical section. */ | ||||
| 	return s; | ||||
| } | ||||
| 
 | ||||
| /* Return the current value the update side's sequence number, no ordering. */ | ||||
| static inline unsigned long rcu_seq_current(unsigned long *sp) | ||||
| { | ||||
| 	return READ_ONCE(*sp); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Given a snapshot from rcu_seq_snap(), determine whether or not a | ||||
|  * full update-side operation has occurred. | ||||
|  */ | ||||
| static inline bool rcu_seq_done(unsigned long *sp, unsigned long s) | ||||
| { | ||||
| 	return ULONG_CMP_GE(READ_ONCE(*sp), s); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * debug_rcu_head_queue()/debug_rcu_head_unqueue() are used internally | ||||
|  * by call_rcu() and rcu callback execution, and are therefore not part of the | ||||
|  | @ -109,12 +186,12 @@ static inline bool __rcu_reclaim(const char *rn, struct rcu_head *head) | |||
| 
 | ||||
| 	rcu_lock_acquire(&rcu_callback_map); | ||||
| 	if (__is_kfree_rcu_offset(offset)) { | ||||
| 		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset)); | ||||
| 		RCU_TRACE(trace_rcu_invoke_kfree_callback(rn, head, offset);) | ||||
| 		kfree((void *)head - offset); | ||||
| 		rcu_lock_release(&rcu_callback_map); | ||||
| 		return true; | ||||
| 	} else { | ||||
| 		RCU_TRACE(trace_rcu_invoke_callback(rn, head)); | ||||
| 		RCU_TRACE(trace_rcu_invoke_callback(rn, head);) | ||||
| 		head->func(head); | ||||
| 		rcu_lock_release(&rcu_callback_map); | ||||
| 		return false; | ||||
|  | @ -144,4 +221,76 @@ void rcu_test_sync_prims(void); | |||
|  */ | ||||
| extern void resched_cpu(int cpu); | ||||
| 
 | ||||
| #if defined(SRCU) || !defined(TINY_RCU) | ||||
| 
 | ||||
| #include <linux/rcu_node_tree.h> | ||||
| 
 | ||||
| extern int rcu_num_lvls; | ||||
| extern int num_rcu_lvl[]; | ||||
| extern int rcu_num_nodes; | ||||
| static bool rcu_fanout_exact; | ||||
| static int rcu_fanout_leaf; | ||||
| 
 | ||||
| /*
 | ||||
|  * Compute the per-level fanout, either using the exact fanout specified | ||||
|  * or balancing the tree, depending on the rcu_fanout_exact boot parameter. | ||||
|  */ | ||||
| static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt) | ||||
| { | ||||
| 	int i; | ||||
| 
 | ||||
| 	if (rcu_fanout_exact) { | ||||
| 		levelspread[rcu_num_lvls - 1] = rcu_fanout_leaf; | ||||
| 		for (i = rcu_num_lvls - 2; i >= 0; i--) | ||||
| 			levelspread[i] = RCU_FANOUT; | ||||
| 	} else { | ||||
| 		int ccur; | ||||
| 		int cprv; | ||||
| 
 | ||||
| 		cprv = nr_cpu_ids; | ||||
| 		for (i = rcu_num_lvls - 1; i >= 0; i--) { | ||||
| 			ccur = levelcnt[i]; | ||||
| 			levelspread[i] = (cprv + ccur - 1) / ccur; | ||||
| 			cprv = ccur; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Do a full breadth-first scan of the rcu_node structures for the | ||||
|  * specified rcu_state structure. | ||||
|  */ | ||||
| #define rcu_for_each_node_breadth_first(rsp, rnp) \ | ||||
| 	for ((rnp) = &(rsp)->node[0]; \ | ||||
| 	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) | ||||
| 
 | ||||
| /*
 | ||||
|  * Do a breadth-first scan of the non-leaf rcu_node structures for the | ||||
|  * specified rcu_state structure.  Note that if there is a singleton | ||||
|  * rcu_node tree with but one rcu_node structure, this loop is a no-op. | ||||
|  */ | ||||
| #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | ||||
| 	for ((rnp) = &(rsp)->node[0]; \ | ||||
| 	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) | ||||
| 
 | ||||
| /*
 | ||||
|  * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | ||||
|  * structure.  Note that if there is a singleton rcu_node tree with but | ||||
|  * one rcu_node structure, this loop -will- visit the rcu_node structure. | ||||
|  * It is still a leaf node, even if it is also the root node. | ||||
|  */ | ||||
| #define rcu_for_each_leaf_node(rsp, rnp) \ | ||||
| 	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ | ||||
| 	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) | ||||
| 
 | ||||
| /*
 | ||||
|  * Iterate over all possible CPUs in a leaf RCU node. | ||||
|  */ | ||||
| #define for_each_leaf_node_possible_cpu(rnp, cpu) \ | ||||
| 	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \ | ||||
| 	     cpu <= rnp->grphi; \ | ||||
| 	     cpu = cpumask_next((cpu), cpu_possible_mask)) | ||||
| 
 | ||||
| #endif /* #if defined(SRCU) || !defined(TINY_RCU) */ | ||||
| 
 | ||||
| #endif /* __LINUX_RCU_H */ | ||||
|  |  | |||
|  | @ -559,19 +559,34 @@ static void srcu_torture_barrier(void) | |||
| 
 | ||||
| static void srcu_torture_stats(void) | ||||
| { | ||||
| 	int cpu; | ||||
| 	int idx = srcu_ctlp->completed & 0x1; | ||||
| 	int __maybe_unused cpu; | ||||
| 	int idx; | ||||
| 
 | ||||
| 	pr_alert("%s%s per-CPU(idx=%d):", | ||||
| #if defined(CONFIG_TREE_SRCU) || defined(CONFIG_CLASSIC_SRCU) | ||||
| #ifdef CONFIG_TREE_SRCU | ||||
| 	idx = srcu_ctlp->srcu_idx & 0x1; | ||||
| #else /* #ifdef CONFIG_TREE_SRCU */ | ||||
| 	idx = srcu_ctlp->completed & 0x1; | ||||
| #endif /* #else #ifdef CONFIG_TREE_SRCU */ | ||||
| 	pr_alert("%s%s Tree SRCU per-CPU(idx=%d):", | ||||
| 		 torture_type, TORTURE_FLAG, idx); | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		unsigned long l0, l1; | ||||
| 		unsigned long u0, u1; | ||||
| 		long c0, c1; | ||||
| 		struct srcu_array *counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); | ||||
| #ifdef CONFIG_TREE_SRCU | ||||
| 		struct srcu_data *counts; | ||||
| 
 | ||||
| 		counts = per_cpu_ptr(srcu_ctlp->sda, cpu); | ||||
| 		u0 = counts->srcu_unlock_count[!idx]; | ||||
| 		u1 = counts->srcu_unlock_count[idx]; | ||||
| #else /* #ifdef CONFIG_TREE_SRCU */ | ||||
| 		struct srcu_array *counts; | ||||
| 
 | ||||
| 		counts = per_cpu_ptr(srcu_ctlp->per_cpu_ref, cpu); | ||||
| 		u0 = counts->unlock_count[!idx]; | ||||
| 		u1 = counts->unlock_count[idx]; | ||||
| #endif /* #else #ifdef CONFIG_TREE_SRCU */ | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * Make sure that a lock is always counted if the corresponding | ||||
|  | @ -579,14 +594,26 @@ static void srcu_torture_stats(void) | |||
| 		 */ | ||||
| 		smp_rmb(); | ||||
| 
 | ||||
| #ifdef CONFIG_TREE_SRCU | ||||
| 		l0 = counts->srcu_lock_count[!idx]; | ||||
| 		l1 = counts->srcu_lock_count[idx]; | ||||
| #else /* #ifdef CONFIG_TREE_SRCU */ | ||||
| 		l0 = counts->lock_count[!idx]; | ||||
| 		l1 = counts->lock_count[idx]; | ||||
| #endif /* #else #ifdef CONFIG_TREE_SRCU */ | ||||
| 
 | ||||
| 		c0 = l0 - u0; | ||||
| 		c1 = l1 - u1; | ||||
| 		pr_cont(" %d(%ld,%ld)", cpu, c0, c1); | ||||
| 	} | ||||
| 	pr_cont("\n"); | ||||
| #elif defined(CONFIG_TINY_SRCU) | ||||
| 	idx = READ_ONCE(srcu_ctlp->srcu_idx) & 0x1; | ||||
| 	pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%d,%d)\n", | ||||
| 		 torture_type, TORTURE_FLAG, idx, | ||||
| 		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[!idx]), | ||||
| 		 READ_ONCE(srcu_ctlp->srcu_lock_nesting[idx])); | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| static void srcu_torture_synchronize_expedited(void) | ||||
|  |  | |||
|  | @ -22,7 +22,7 @@ | |||
|  *	   Lai Jiangshan <laijs@cn.fujitsu.com> | ||||
|  * | ||||
|  * For detailed explanation of Read-Copy Update mechanism see - | ||||
|  * 		Documentation/RCU/ *.txt | ||||
|  *		Documentation/RCU/ *.txt | ||||
|  * | ||||
|  */ | ||||
| 
 | ||||
|  | @ -243,8 +243,14 @@ static bool srcu_readers_active(struct srcu_struct *sp) | |||
|  * cleanup_srcu_struct - deconstruct a sleep-RCU structure | ||||
|  * @sp: structure to clean up. | ||||
|  * | ||||
|  * Must invoke this after you are finished using a given srcu_struct that | ||||
|  * was initialized via init_srcu_struct(), else you leak memory. | ||||
|  * Must invoke this only after you are finished using a given srcu_struct | ||||
|  * that was initialized via init_srcu_struct().  This code does some | ||||
|  * probabalistic checking, spotting late uses of srcu_read_lock(), | ||||
|  * synchronize_srcu(), synchronize_srcu_expedited(), and call_srcu(). | ||||
|  * If any such late uses are detected, the per-CPU memory associated with | ||||
|  * the srcu_struct is simply leaked and WARN_ON() is invoked.  If the | ||||
|  * caller frees the srcu_struct itself, a use-after-free crash will likely | ||||
|  * ensue, but at least there will be a warning printed. | ||||
|  */ | ||||
| void cleanup_srcu_struct(struct srcu_struct *sp) | ||||
| { | ||||
|  |  | |||
							
								
								
									
										215
									
								
								kernel/rcu/srcutiny.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										215
									
								
								kernel/rcu/srcutiny.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,215 @@ | |||
| /*
 | ||||
|  * Sleepable Read-Copy Update mechanism for mutual exclusion, | ||||
|  *	tiny version for non-preemptible single-CPU use. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright (C) IBM Corporation, 2017 | ||||
|  * | ||||
|  * Author: Paul McKenney <paulmck@us.ibm.com> | ||||
|  */ | ||||
| 
 | ||||
| #include <linux/export.h> | ||||
| #include <linux/mutex.h> | ||||
| #include <linux/preempt.h> | ||||
| #include <linux/rcupdate_wait.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/delay.h> | ||||
| #include <linux/srcu.h> | ||||
| 
 | ||||
| #include <linux/rcu_node_tree.h> | ||||
| #include "rcu.h" | ||||
| 
 | ||||
| static int init_srcu_struct_fields(struct srcu_struct *sp) | ||||
| { | ||||
| 	sp->srcu_lock_nesting[0] = 0; | ||||
| 	sp->srcu_lock_nesting[1] = 0; | ||||
| 	init_swait_queue_head(&sp->srcu_wq); | ||||
| 	sp->srcu_gp_seq = 0; | ||||
| 	rcu_segcblist_init(&sp->srcu_cblist); | ||||
| 	sp->srcu_gp_running = false; | ||||
| 	sp->srcu_gp_waiting = false; | ||||
| 	sp->srcu_idx = 0; | ||||
| 	INIT_WORK(&sp->srcu_work, srcu_drive_gp); | ||||
| 	return 0; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 
 | ||||
| int __init_srcu_struct(struct srcu_struct *sp, const char *name, | ||||
| 		       struct lock_class_key *key) | ||||
| { | ||||
| 	/* Don't re-initialize a lock while it is held. */ | ||||
| 	debug_check_no_locks_freed((void *)sp, sizeof(*sp)); | ||||
| 	lockdep_init_map(&sp->dep_map, name, key, 0); | ||||
| 	return init_srcu_struct_fields(sp); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(__init_srcu_struct); | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| /*
 | ||||
|  * init_srcu_struct - initialize a sleep-RCU structure | ||||
|  * @sp: structure to initialize. | ||||
|  * | ||||
|  * Must invoke this on a given srcu_struct before passing that srcu_struct | ||||
|  * to any other function.  Each srcu_struct represents a separate domain | ||||
|  * of SRCU protection. | ||||
|  */ | ||||
| int init_srcu_struct(struct srcu_struct *sp) | ||||
| { | ||||
| 	return init_srcu_struct_fields(sp); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(init_srcu_struct); | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| /*
 | ||||
|  * cleanup_srcu_struct - deconstruct a sleep-RCU structure | ||||
|  * @sp: structure to clean up. | ||||
|  * | ||||
|  * Must invoke this after you are finished using a given srcu_struct that | ||||
|  * was initialized via init_srcu_struct(), else you leak memory. | ||||
|  */ | ||||
| void cleanup_srcu_struct(struct srcu_struct *sp) | ||||
| { | ||||
| 	WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); | ||||
| 	flush_work(&sp->srcu_work); | ||||
| 	WARN_ON(rcu_seq_state(sp->srcu_gp_seq)); | ||||
| 	WARN_ON(sp->srcu_gp_running); | ||||
| 	WARN_ON(sp->srcu_gp_waiting); | ||||
| 	WARN_ON(!rcu_segcblist_empty(&sp->srcu_cblist)); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(cleanup_srcu_struct); | ||||
| 
 | ||||
| /*
 | ||||
|  * Counts the new reader in the appropriate per-CPU element of the | ||||
|  * srcu_struct.  Must be called from process context. | ||||
|  * Returns an index that must be passed to the matching srcu_read_unlock(). | ||||
|  */ | ||||
| int __srcu_read_lock(struct srcu_struct *sp) | ||||
| { | ||||
| 	int idx; | ||||
| 
 | ||||
| 	idx = READ_ONCE(sp->srcu_idx); | ||||
| 	WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); | ||||
| 	return idx; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(__srcu_read_lock); | ||||
| 
 | ||||
| /*
 | ||||
|  * Removes the count for the old reader from the appropriate element of | ||||
|  * the srcu_struct.  Must be called from process context. | ||||
|  */ | ||||
| void __srcu_read_unlock(struct srcu_struct *sp, int idx) | ||||
| { | ||||
| 	int newval = sp->srcu_lock_nesting[idx] - 1; | ||||
| 
 | ||||
| 	WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); | ||||
| 	if (!newval && READ_ONCE(sp->srcu_gp_waiting)) | ||||
| 		swake_up(&sp->srcu_wq); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(__srcu_read_unlock); | ||||
| 
 | ||||
| /*
 | ||||
|  * Workqueue handler to drive one grace period and invoke any callbacks | ||||
|  * that become ready as a result.  Single-CPU and !PREEMPT operation | ||||
|  * means that we get away with murder on synchronization.  ;-) | ||||
|  */ | ||||
| void srcu_drive_gp(struct work_struct *wp) | ||||
| { | ||||
| 	int idx; | ||||
| 	struct rcu_cblist ready_cbs; | ||||
| 	struct srcu_struct *sp; | ||||
| 	struct rcu_head *rhp; | ||||
| 
 | ||||
| 	sp = container_of(wp, struct srcu_struct, srcu_work); | ||||
| 	if (sp->srcu_gp_running || rcu_segcblist_empty(&sp->srcu_cblist)) | ||||
| 		return; /* Already running or nothing to do. */ | ||||
| 
 | ||||
| 	/* Tag recently arrived callbacks and wait for readers. */ | ||||
| 	WRITE_ONCE(sp->srcu_gp_running, true); | ||||
| 	rcu_segcblist_accelerate(&sp->srcu_cblist, | ||||
| 				 rcu_seq_snap(&sp->srcu_gp_seq)); | ||||
| 	rcu_seq_start(&sp->srcu_gp_seq); | ||||
| 	idx = sp->srcu_idx; | ||||
| 	WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); | ||||
| 	WRITE_ONCE(sp->srcu_gp_waiting, true);  /* srcu_read_unlock() wakes! */ | ||||
| 	swait_event(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); | ||||
| 	WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ | ||||
| 	rcu_seq_end(&sp->srcu_gp_seq); | ||||
| 
 | ||||
| 	/* Update callback list based on GP, and invoke ready callbacks. */ | ||||
| 	rcu_segcblist_advance(&sp->srcu_cblist, | ||||
| 			      rcu_seq_current(&sp->srcu_gp_seq)); | ||||
| 	if (rcu_segcblist_ready_cbs(&sp->srcu_cblist)) { | ||||
| 		rcu_cblist_init(&ready_cbs); | ||||
| 		local_irq_disable(); | ||||
| 		rcu_segcblist_extract_done_cbs(&sp->srcu_cblist, &ready_cbs); | ||||
| 		local_irq_enable(); | ||||
| 		rhp = rcu_cblist_dequeue(&ready_cbs); | ||||
| 		for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { | ||||
| 			local_bh_disable(); | ||||
| 			rhp->func(rhp); | ||||
| 			local_bh_enable(); | ||||
| 		} | ||||
| 		local_irq_disable(); | ||||
| 		rcu_segcblist_insert_count(&sp->srcu_cblist, &ready_cbs); | ||||
| 		local_irq_enable(); | ||||
| 	} | ||||
| 	WRITE_ONCE(sp->srcu_gp_running, false); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If more callbacks, reschedule ourselves.  This can race with | ||||
| 	 * a call_srcu() at interrupt level, but the ->srcu_gp_running | ||||
| 	 * checks will straighten that out. | ||||
| 	 */ | ||||
| 	if (!rcu_segcblist_empty(&sp->srcu_cblist)) | ||||
| 		schedule_work(&sp->srcu_work); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(srcu_drive_gp); | ||||
| 
 | ||||
| /*
 | ||||
|  * Enqueue an SRCU callback on the specified srcu_struct structure, | ||||
|  * initiating grace-period processing if it is not already running. | ||||
|  */ | ||||
| void call_srcu(struct srcu_struct *sp, struct rcu_head *head, | ||||
| 	       rcu_callback_t func) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	head->func = func; | ||||
| 	local_irq_save(flags); | ||||
| 	rcu_segcblist_enqueue(&sp->srcu_cblist, head, false); | ||||
| 	local_irq_restore(flags); | ||||
| 	if (!READ_ONCE(sp->srcu_gp_running)) | ||||
| 		schedule_work(&sp->srcu_work); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(call_srcu); | ||||
| 
 | ||||
| /*
 | ||||
|  * synchronize_srcu - wait for prior SRCU read-side critical-section completion | ||||
|  */ | ||||
| void synchronize_srcu(struct srcu_struct *sp) | ||||
| { | ||||
| 	struct rcu_synchronize rs; | ||||
| 
 | ||||
| 	init_rcu_head_on_stack(&rs.head); | ||||
| 	init_completion(&rs.completion); | ||||
| 	call_srcu(sp, &rs.head, wakeme_after_rcu); | ||||
| 	wait_for_completion(&rs.completion); | ||||
| 	destroy_rcu_head_on_stack(&rs.head); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_srcu); | ||||
							
								
								
									
										996
									
								
								kernel/rcu/srcutree.c
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										996
									
								
								kernel/rcu/srcutree.c
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,996 @@ | |||
| /*
 | ||||
|  * Sleepable Read-Copy Update mechanism for mutual exclusion. | ||||
|  * | ||||
|  * This program is free software; you can redistribute it and/or modify | ||||
|  * it under the terms of the GNU General Public License as published by | ||||
|  * the Free Software Foundation; either version 2 of the License, or | ||||
|  * (at your option) any later version. | ||||
|  * | ||||
|  * This program is distributed in the hope that it will be useful, | ||||
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|  * GNU General Public License for more details. | ||||
|  * | ||||
|  * You should have received a copy of the GNU General Public License | ||||
|  * along with this program; if not, you can access it online at | ||||
|  * http://www.gnu.org/licenses/gpl-2.0.html.
 | ||||
|  * | ||||
|  * Copyright (C) IBM Corporation, 2006 | ||||
|  * Copyright (C) Fujitsu, 2012 | ||||
|  * | ||||
|  * Author: Paul McKenney <paulmck@us.ibm.com> | ||||
|  *	   Lai Jiangshan <laijs@cn.fujitsu.com> | ||||
|  * | ||||
|  * For detailed explanation of Read-Copy Update mechanism see - | ||||
|  *		Documentation/RCU/ *.txt | ||||
|  * | ||||
|  */ | ||||
| 
 | ||||
| #include <linux/export.h> | ||||
| #include <linux/mutex.h> | ||||
| #include <linux/percpu.h> | ||||
| #include <linux/preempt.h> | ||||
| #include <linux/rcupdate_wait.h> | ||||
| #include <linux/sched.h> | ||||
| #include <linux/smp.h> | ||||
| #include <linux/delay.h> | ||||
| #include <linux/srcu.h> | ||||
| 
 | ||||
| #include "rcu.h" | ||||
| 
 | ||||
| static void srcu_invoke_callbacks(struct work_struct *work); | ||||
| static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); | ||||
| 
 | ||||
| /*
 | ||||
|  * Initialize SRCU combining tree.  Note that statically allocated | ||||
|  * srcu_struct structures might already have srcu_read_lock() and | ||||
|  * srcu_read_unlock() running against them.  So if the is_static parameter | ||||
|  * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. | ||||
|  */ | ||||
| static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) | ||||
| { | ||||
| 	int cpu; | ||||
| 	int i; | ||||
| 	int level = 0; | ||||
| 	int levelspread[RCU_NUM_LVLS]; | ||||
| 	struct srcu_data *sdp; | ||||
| 	struct srcu_node *snp; | ||||
| 	struct srcu_node *snp_first; | ||||
| 
 | ||||
| 	/* Work out the overall tree geometry. */ | ||||
| 	sp->level[0] = &sp->node[0]; | ||||
| 	for (i = 1; i < rcu_num_lvls; i++) | ||||
| 		sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; | ||||
| 	rcu_init_levelspread(levelspread, num_rcu_lvl); | ||||
| 
 | ||||
| 	/* Each pass through this loop initializes one srcu_node structure. */ | ||||
| 	rcu_for_each_node_breadth_first(sp, snp) { | ||||
| 		spin_lock_init(&snp->lock); | ||||
| 		for (i = 0; i < ARRAY_SIZE(snp->srcu_have_cbs); i++) | ||||
| 			snp->srcu_have_cbs[i] = 0; | ||||
| 		snp->grplo = -1; | ||||
| 		snp->grphi = -1; | ||||
| 		if (snp == &sp->node[0]) { | ||||
| 			/* Root node, special case. */ | ||||
| 			snp->srcu_parent = NULL; | ||||
| 			continue; | ||||
| 		} | ||||
| 
 | ||||
| 		/* Non-root node. */ | ||||
| 		if (snp == sp->level[level + 1]) | ||||
| 			level++; | ||||
| 		snp->srcu_parent = sp->level[level - 1] + | ||||
| 				   (snp - sp->level[level]) / | ||||
| 				   levelspread[level - 1]; | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Initialize the per-CPU srcu_data array, which feeds into the | ||||
| 	 * leaves of the srcu_node tree. | ||||
| 	 */ | ||||
| 	WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != | ||||
| 		     ARRAY_SIZE(sdp->srcu_unlock_count)); | ||||
| 	level = rcu_num_lvls - 1; | ||||
| 	snp_first = sp->level[level]; | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		sdp = per_cpu_ptr(sp->sda, cpu); | ||||
| 		spin_lock_init(&sdp->lock); | ||||
| 		rcu_segcblist_init(&sdp->srcu_cblist); | ||||
| 		sdp->srcu_cblist_invoking = false; | ||||
| 		sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; | ||||
| 		sdp->mynode = &snp_first[cpu / levelspread[level]]; | ||||
| 		for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { | ||||
| 			if (snp->grplo < 0) | ||||
| 				snp->grplo = cpu; | ||||
| 			snp->grphi = cpu; | ||||
| 		} | ||||
| 		sdp->cpu = cpu; | ||||
| 		INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); | ||||
| 		sdp->sp = sp; | ||||
| 		if (is_static) | ||||
| 			continue; | ||||
| 
 | ||||
| 		/* Dynamically allocated, better be no srcu_read_locks()! */ | ||||
| 		for (i = 0; i < ARRAY_SIZE(sdp->srcu_lock_count); i++) { | ||||
| 			sdp->srcu_lock_count[i] = 0; | ||||
| 			sdp->srcu_unlock_count[i] = 0; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Initialize non-compile-time initialized fields, including the | ||||
|  * associated srcu_node and srcu_data structures.  The is_static | ||||
|  * parameter is passed through to init_srcu_struct_nodes(), and | ||||
|  * also tells us that ->sda has already been wired up to srcu_data. | ||||
|  */ | ||||
| static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) | ||||
| { | ||||
| 	mutex_init(&sp->srcu_cb_mutex); | ||||
| 	mutex_init(&sp->srcu_gp_mutex); | ||||
| 	sp->srcu_idx = 0; | ||||
| 	sp->srcu_gp_seq = 0; | ||||
| 	atomic_set(&sp->srcu_exp_cnt, 0); | ||||
| 	sp->srcu_barrier_seq = 0; | ||||
| 	mutex_init(&sp->srcu_barrier_mutex); | ||||
| 	atomic_set(&sp->srcu_barrier_cpu_cnt, 0); | ||||
| 	INIT_DELAYED_WORK(&sp->work, process_srcu); | ||||
| 	if (!is_static) | ||||
| 		sp->sda = alloc_percpu(struct srcu_data); | ||||
| 	init_srcu_struct_nodes(sp, is_static); | ||||
| 	smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ | ||||
| 	return sp->sda ? 0 : -ENOMEM; | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| 
 | ||||
| int __init_srcu_struct(struct srcu_struct *sp, const char *name, | ||||
| 		       struct lock_class_key *key) | ||||
| { | ||||
| 	/* Don't re-initialize a lock while it is held. */ | ||||
| 	debug_check_no_locks_freed((void *)sp, sizeof(*sp)); | ||||
| 	lockdep_init_map(&sp->dep_map, name, key, 0); | ||||
| 	spin_lock_init(&sp->gp_lock); | ||||
| 	return init_srcu_struct_fields(sp, false); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(__init_srcu_struct); | ||||
| 
 | ||||
| #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| /**
 | ||||
|  * init_srcu_struct - initialize a sleep-RCU structure | ||||
|  * @sp: structure to initialize. | ||||
|  * | ||||
|  * Must invoke this on a given srcu_struct before passing that srcu_struct | ||||
|  * to any other function.  Each srcu_struct represents a separate domain | ||||
|  * of SRCU protection. | ||||
|  */ | ||||
| int init_srcu_struct(struct srcu_struct *sp) | ||||
| { | ||||
| 	spin_lock_init(&sp->gp_lock); | ||||
| 	return init_srcu_struct_fields(sp, false); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(init_srcu_struct); | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| 
 | ||||
| /*
 | ||||
|  * First-use initialization of statically allocated srcu_struct | ||||
|  * structure.  Wiring up the combining tree is more than can be | ||||
|  * done with compile-time initialization, so this check is added | ||||
|  * to each update-side SRCU primitive.  Use ->gp_lock, which -is- | ||||
|  * compile-time initialized, to resolve races involving multiple | ||||
|  * CPUs trying to garner first-use privileges. | ||||
|  */ | ||||
| static void check_init_srcu_struct(struct srcu_struct *sp) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INIT); | ||||
| 	/* The smp_load_acquire() pairs with the smp_store_release(). */ | ||||
| 	if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ | ||||
| 		return; /* Already initialized. */ | ||||
| 	spin_lock_irqsave(&sp->gp_lock, flags); | ||||
| 	if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { | ||||
| 		spin_unlock_irqrestore(&sp->gp_lock, flags); | ||||
| 		return; | ||||
| 	} | ||||
| 	init_srcu_struct_fields(sp, true); | ||||
| 	spin_unlock_irqrestore(&sp->gp_lock, flags); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Returns approximate total of the readers' ->srcu_lock_count[] values | ||||
|  * for the rank of per-CPU counters specified by idx. | ||||
|  */ | ||||
| static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) | ||||
| { | ||||
| 	int cpu; | ||||
| 	unsigned long sum = 0; | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); | ||||
| 
 | ||||
| 		sum += READ_ONCE(cpuc->srcu_lock_count[idx]); | ||||
| 	} | ||||
| 	return sum; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Returns approximate total of the readers' ->srcu_unlock_count[] values | ||||
|  * for the rank of per-CPU counters specified by idx. | ||||
|  */ | ||||
| static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) | ||||
| { | ||||
| 	int cpu; | ||||
| 	unsigned long sum = 0; | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); | ||||
| 
 | ||||
| 		sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); | ||||
| 	} | ||||
| 	return sum; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Return true if the number of pre-existing readers is determined to | ||||
|  * be zero. | ||||
|  */ | ||||
| static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) | ||||
| { | ||||
| 	unsigned long unlocks; | ||||
| 
 | ||||
| 	unlocks = srcu_readers_unlock_idx(sp, idx); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Make sure that a lock is always counted if the corresponding | ||||
| 	 * unlock is counted. Needs to be a smp_mb() as the read side may | ||||
| 	 * contain a read from a variable that is written to before the | ||||
| 	 * synchronize_srcu() in the write side. In this case smp_mb()s | ||||
| 	 * A and B act like the store buffering pattern. | ||||
| 	 * | ||||
| 	 * This smp_mb() also pairs with smp_mb() C to prevent accesses | ||||
| 	 * after the synchronize_srcu() from being executed before the | ||||
| 	 * grace period ends. | ||||
| 	 */ | ||||
| 	smp_mb(); /* A */ | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * If the locks are the same as the unlocks, then there must have | ||||
| 	 * been no readers on this index at some time in between. This does | ||||
| 	 * not mean that there are no more readers, as one could have read | ||||
| 	 * the current index but not have incremented the lock counter yet. | ||||
| 	 * | ||||
| 	 * Possible bug: There is no guarantee that there haven't been | ||||
| 	 * ULONG_MAX increments of ->srcu_lock_count[] since the unlocks were | ||||
| 	 * counted, meaning that this could return true even if there are | ||||
| 	 * still active readers.  Since there are no memory barriers around | ||||
| 	 * srcu_flip(), the CPU is not required to increment ->srcu_idx | ||||
| 	 * before running srcu_readers_unlock_idx(), which means that there | ||||
| 	 * could be an arbitrarily large number of critical sections that | ||||
| 	 * execute after srcu_readers_unlock_idx() but use the old value | ||||
| 	 * of ->srcu_idx. | ||||
| 	 */ | ||||
| 	return srcu_readers_lock_idx(sp, idx) == unlocks; | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * srcu_readers_active - returns true if there are readers. and false | ||||
|  *                       otherwise | ||||
|  * @sp: which srcu_struct to count active readers (holding srcu_read_lock). | ||||
|  * | ||||
|  * Note that this is not an atomic primitive, and can therefore suffer | ||||
|  * severe errors when invoked on an active srcu_struct.  That said, it | ||||
|  * can be useful as an error check at cleanup time. | ||||
|  */ | ||||
| static bool srcu_readers_active(struct srcu_struct *sp) | ||||
| { | ||||
| 	int cpu; | ||||
| 	unsigned long sum = 0; | ||||
| 
 | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); | ||||
| 
 | ||||
| 		sum += READ_ONCE(cpuc->srcu_lock_count[0]); | ||||
| 		sum += READ_ONCE(cpuc->srcu_lock_count[1]); | ||||
| 		sum -= READ_ONCE(cpuc->srcu_unlock_count[0]); | ||||
| 		sum -= READ_ONCE(cpuc->srcu_unlock_count[1]); | ||||
| 	} | ||||
| 	return sum; | ||||
| } | ||||
| 
 | ||||
| #define SRCU_INTERVAL		1 | ||||
| 
 | ||||
| /**
 | ||||
|  * cleanup_srcu_struct - deconstruct a sleep-RCU structure | ||||
|  * @sp: structure to clean up. | ||||
|  * | ||||
|  * Must invoke this after you are finished using a given srcu_struct that | ||||
|  * was initialized via init_srcu_struct(), else you leak memory. | ||||
|  */ | ||||
| void cleanup_srcu_struct(struct srcu_struct *sp) | ||||
| { | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	WARN_ON_ONCE(atomic_read(&sp->srcu_exp_cnt)); | ||||
| 	if (WARN_ON(srcu_readers_active(sp))) | ||||
| 		return; /* Leakage unless caller handles error. */ | ||||
| 	flush_delayed_work(&sp->work); | ||||
| 	for_each_possible_cpu(cpu) | ||||
| 		flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); | ||||
| 	if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || | ||||
| 	    WARN_ON(srcu_readers_active(sp))) { | ||||
| 		pr_info("cleanup_srcu_struct: Active srcu_struct %p state: %d\n", sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); | ||||
| 		return; /* Caller forgot to stop doing call_srcu()? */ | ||||
| 	} | ||||
| 	free_percpu(sp->sda); | ||||
| 	sp->sda = NULL; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(cleanup_srcu_struct); | ||||
| 
 | ||||
| /*
 | ||||
|  * Counts the new reader in the appropriate per-CPU element of the | ||||
|  * srcu_struct.  Must be called from process context. | ||||
|  * Returns an index that must be passed to the matching srcu_read_unlock(). | ||||
|  */ | ||||
| int __srcu_read_lock(struct srcu_struct *sp) | ||||
| { | ||||
| 	int idx; | ||||
| 
 | ||||
| 	idx = READ_ONCE(sp->srcu_idx) & 0x1; | ||||
| 	__this_cpu_inc(sp->sda->srcu_lock_count[idx]); | ||||
| 	smp_mb(); /* B */  /* Avoid leaking the critical section. */ | ||||
| 	return idx; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(__srcu_read_lock); | ||||
| 
 | ||||
| /*
 | ||||
|  * Removes the count for the old reader from the appropriate per-CPU | ||||
|  * element of the srcu_struct.  Note that this may well be a different | ||||
|  * CPU than that which was incremented by the corresponding srcu_read_lock(). | ||||
|  * Must be called from process context. | ||||
|  */ | ||||
| void __srcu_read_unlock(struct srcu_struct *sp, int idx) | ||||
| { | ||||
| 	smp_mb(); /* C */  /* Avoid leaking the critical section. */ | ||||
| 	this_cpu_inc(sp->sda->srcu_unlock_count[idx]); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(__srcu_read_unlock); | ||||
| 
 | ||||
| /*
 | ||||
|  * We use an adaptive strategy for synchronize_srcu() and especially for | ||||
|  * synchronize_srcu_expedited().  We spin for a fixed time period | ||||
|  * (defined below) to allow SRCU readers to exit their read-side critical | ||||
|  * sections.  If there are still some readers after a few microseconds, | ||||
|  * we repeatedly block for 1-millisecond time periods. | ||||
|  */ | ||||
| #define SRCU_RETRY_CHECK_DELAY		5 | ||||
| 
 | ||||
| /*
 | ||||
|  * Start an SRCU grace period. | ||||
|  */ | ||||
| static void srcu_gp_start(struct srcu_struct *sp) | ||||
| { | ||||
| 	struct srcu_data *sdp = this_cpu_ptr(sp->sda); | ||||
| 	int state; | ||||
| 
 | ||||
| 	RCU_LOCKDEP_WARN(!lockdep_is_held(&sp->gp_lock), | ||||
| 			 "Invoked srcu_gp_start() without ->gp_lock!"); | ||||
| 	WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); | ||||
| 	rcu_segcblist_advance(&sdp->srcu_cblist, | ||||
| 			      rcu_seq_current(&sp->srcu_gp_seq)); | ||||
| 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, | ||||
| 				       rcu_seq_snap(&sp->srcu_gp_seq)); | ||||
| 	rcu_seq_start(&sp->srcu_gp_seq); | ||||
| 	state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); | ||||
| 	WARN_ON_ONCE(state != SRCU_STATE_SCAN1); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Track online CPUs to guide callback workqueue placement. | ||||
|  */ | ||||
| DEFINE_PER_CPU(bool, srcu_online); | ||||
| 
 | ||||
| void srcu_online_cpu(unsigned int cpu) | ||||
| { | ||||
| 	WRITE_ONCE(per_cpu(srcu_online, cpu), true); | ||||
| } | ||||
| 
 | ||||
| void srcu_offline_cpu(unsigned int cpu) | ||||
| { | ||||
| 	WRITE_ONCE(per_cpu(srcu_online, cpu), false); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Place the workqueue handler on the specified CPU if online, otherwise | ||||
|  * just run it whereever.  This is useful for placing workqueue handlers | ||||
|  * that are to invoke the specified CPU's callbacks. | ||||
|  */ | ||||
| static bool srcu_queue_delayed_work_on(int cpu, struct workqueue_struct *wq, | ||||
| 				       struct delayed_work *dwork, | ||||
| 				       unsigned long delay) | ||||
| { | ||||
| 	bool ret; | ||||
| 
 | ||||
| 	preempt_disable(); | ||||
| 	if (READ_ONCE(per_cpu(srcu_online, cpu))) | ||||
| 		ret = queue_delayed_work_on(cpu, wq, dwork, delay); | ||||
| 	else | ||||
| 		ret = queue_delayed_work(wq, dwork, delay); | ||||
| 	preempt_enable(); | ||||
| 	return ret; | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Schedule callback invocation for the specified srcu_data structure, | ||||
|  * if possible, on the corresponding CPU. | ||||
|  */ | ||||
| static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) | ||||
| { | ||||
| 	srcu_queue_delayed_work_on(sdp->cpu, system_power_efficient_wq, | ||||
| 				   &sdp->work, delay); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Schedule callback invocation for all srcu_data structures associated | ||||
|  * with the specified srcu_node structure, if possible, on the corresponding | ||||
|  * CPUs. | ||||
|  */ | ||||
| static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp) | ||||
| { | ||||
| 	int cpu; | ||||
| 
 | ||||
| 	for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) | ||||
| 		srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), | ||||
| 				      atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Note the end of an SRCU grace period.  Initiates callback invocation | ||||
|  * and starts a new grace period if needed. | ||||
|  * | ||||
|  * The ->srcu_cb_mutex acquisition does not protect any data, but | ||||
|  * instead prevents more than one grace period from starting while we | ||||
|  * are initiating callback invocation.  This allows the ->srcu_have_cbs[] | ||||
|  * array to have a finite number of elements. | ||||
|  */ | ||||
| static void srcu_gp_end(struct srcu_struct *sp) | ||||
| { | ||||
| 	bool cbs; | ||||
| 	unsigned long gpseq; | ||||
| 	int idx; | ||||
| 	int idxnext; | ||||
| 	struct srcu_node *snp; | ||||
| 
 | ||||
| 	/* Prevent more than one additional grace period. */ | ||||
| 	mutex_lock(&sp->srcu_cb_mutex); | ||||
| 
 | ||||
| 	/* End the current grace period. */ | ||||
| 	spin_lock_irq(&sp->gp_lock); | ||||
| 	idx = rcu_seq_state(sp->srcu_gp_seq); | ||||
| 	WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); | ||||
| 	rcu_seq_end(&sp->srcu_gp_seq); | ||||
| 	gpseq = rcu_seq_current(&sp->srcu_gp_seq); | ||||
| 	spin_unlock_irq(&sp->gp_lock); | ||||
| 	mutex_unlock(&sp->srcu_gp_mutex); | ||||
| 	/* A new grace period can start at this point.  But only one. */ | ||||
| 
 | ||||
| 	/* Initiate callback invocation as needed. */ | ||||
| 	idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); | ||||
| 	idxnext = (idx + 1) % ARRAY_SIZE(snp->srcu_have_cbs); | ||||
| 	rcu_for_each_node_breadth_first(sp, snp) { | ||||
| 		spin_lock_irq(&snp->lock); | ||||
| 		cbs = false; | ||||
| 		if (snp >= sp->level[rcu_num_lvls - 1]) | ||||
| 			cbs = snp->srcu_have_cbs[idx] == gpseq; | ||||
| 		snp->srcu_have_cbs[idx] = gpseq; | ||||
| 		rcu_seq_set_state(&snp->srcu_have_cbs[idx], 1); | ||||
| 		spin_unlock_irq(&snp->lock); | ||||
| 		if (cbs) { | ||||
| 			smp_mb(); /* GP end before CB invocation. */ | ||||
| 			srcu_schedule_cbs_snp(sp, snp); | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/* Callback initiation done, allow grace periods after next. */ | ||||
| 	mutex_unlock(&sp->srcu_cb_mutex); | ||||
| 
 | ||||
| 	/* Start a new grace period if needed. */ | ||||
| 	spin_lock_irq(&sp->gp_lock); | ||||
| 	gpseq = rcu_seq_current(&sp->srcu_gp_seq); | ||||
| 	if (!rcu_seq_state(gpseq) && | ||||
| 	    ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { | ||||
| 		srcu_gp_start(sp); | ||||
| 		spin_unlock_irq(&sp->gp_lock); | ||||
| 		/* Throttle expedited grace periods: Should be rare! */ | ||||
| 		srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) && | ||||
| 				    rcu_seq_ctr(gpseq) & 0xf | ||||
| 				    ? 0 | ||||
| 				    : SRCU_INTERVAL); | ||||
| 	} else { | ||||
| 		spin_unlock_irq(&sp->gp_lock); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Funnel-locking scheme to scalably mediate many concurrent grace-period | ||||
|  * requests.  The winner has to do the work of actually starting grace | ||||
|  * period s.  Losers must either ensure that their desired grace-period | ||||
|  * number is recorded on at least their leaf srcu_node structure, or they | ||||
|  * must take steps to invoke their own callbacks. | ||||
|  */ | ||||
| static void srcu_funnel_gp_start(struct srcu_struct *sp, | ||||
| 				 struct srcu_data *sdp, | ||||
| 				 unsigned long s) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	int idx = rcu_seq_ctr(s) % ARRAY_SIZE(sdp->mynode->srcu_have_cbs); | ||||
| 	struct srcu_node *snp = sdp->mynode; | ||||
| 	unsigned long snp_seq; | ||||
| 
 | ||||
| 	/* Each pass through the loop does one level of the srcu_node tree. */ | ||||
| 	for (; snp != NULL; snp = snp->srcu_parent) { | ||||
| 		if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) | ||||
| 			return; /* GP already done and CBs recorded. */ | ||||
| 		spin_lock_irqsave(&snp->lock, flags); | ||||
| 		if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { | ||||
| 			snp_seq = snp->srcu_have_cbs[idx]; | ||||
| 			spin_unlock_irqrestore(&snp->lock, flags); | ||||
| 			if (snp == sdp->mynode && snp_seq != s) { | ||||
| 				smp_mb(); /* CBs after GP! */ | ||||
| 				srcu_schedule_cbs_sdp(sdp, 0); | ||||
| 			} | ||||
| 			return; | ||||
| 		} | ||||
| 		snp->srcu_have_cbs[idx] = s; | ||||
| 		spin_unlock_irqrestore(&snp->lock, flags); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Top of tree, must ensure the grace period will be started. */ | ||||
| 	spin_lock_irqsave(&sp->gp_lock, flags); | ||||
| 	if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { | ||||
| 		/*
 | ||||
| 		 * Record need for grace period s.  Pair with load | ||||
| 		 * acquire setting up for initialization. | ||||
| 		 */ | ||||
| 		smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ | ||||
| 	} | ||||
| 
 | ||||
| 	/* If grace period not already done and none in progress, start it. */ | ||||
| 	if (!rcu_seq_done(&sp->srcu_gp_seq, s) && | ||||
| 	    rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { | ||||
| 		WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); | ||||
| 		srcu_gp_start(sp); | ||||
| 		queue_delayed_work(system_power_efficient_wq, &sp->work, | ||||
| 				   atomic_read(&sp->srcu_exp_cnt) | ||||
| 				   ? 0 | ||||
| 				   : SRCU_INTERVAL); | ||||
| 	} | ||||
| 	spin_unlock_irqrestore(&sp->gp_lock, flags); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Wait until all readers counted by array index idx complete, but | ||||
|  * loop an additional time if there is an expedited grace period pending. | ||||
|  * The caller must ensure that ->srcu_idx is not changed while checking. | ||||
|  */ | ||||
| static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) | ||||
| { | ||||
| 	for (;;) { | ||||
| 		if (srcu_readers_active_idx_check(sp, idx)) | ||||
| 			return true; | ||||
| 		if (--trycount + !!atomic_read(&sp->srcu_exp_cnt) <= 0) | ||||
| 			return false; | ||||
| 		udelay(SRCU_RETRY_CHECK_DELAY); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Increment the ->srcu_idx counter so that future SRCU readers will | ||||
|  * use the other rank of the ->srcu_(un)lock_count[] arrays.  This allows | ||||
|  * us to wait for pre-existing readers in a starvation-free manner. | ||||
|  */ | ||||
| static void srcu_flip(struct srcu_struct *sp) | ||||
| { | ||||
| 	WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Ensure that if the updater misses an __srcu_read_unlock() | ||||
| 	 * increment, that task's next __srcu_read_lock() will see the | ||||
| 	 * above counter update.  Note that both this memory barrier | ||||
| 	 * and the one in srcu_readers_active_idx_check() provide the | ||||
| 	 * guarantee for __srcu_read_lock(). | ||||
| 	 */ | ||||
| 	smp_mb(); /* D */  /* Pairs with C. */ | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Enqueue an SRCU callback on the srcu_data structure associated with | ||||
|  * the current CPU and the specified srcu_struct structure, initiating | ||||
|  * grace-period processing if it is not already running. | ||||
|  * | ||||
|  * Note that all CPUs must agree that the grace period extended beyond | ||||
|  * all pre-existing SRCU read-side critical section.  On systems with | ||||
|  * more than one CPU, this means that when "func()" is invoked, each CPU | ||||
|  * is guaranteed to have executed a full memory barrier since the end of | ||||
|  * its last corresponding SRCU read-side critical section whose beginning | ||||
|  * preceded the call to call_rcu().  It also means that each CPU executing | ||||
|  * an SRCU read-side critical section that continues beyond the start of | ||||
|  * "func()" must have executed a memory barrier after the call_rcu() | ||||
|  * but before the beginning of that SRCU read-side critical section. | ||||
|  * Note that these guarantees include CPUs that are offline, idle, or | ||||
|  * executing in user mode, as well as CPUs that are executing in the kernel. | ||||
|  * | ||||
|  * Furthermore, if CPU A invoked call_rcu() and CPU B invoked the | ||||
|  * resulting SRCU callback function "func()", then both CPU A and CPU | ||||
|  * B are guaranteed to execute a full memory barrier during the time | ||||
|  * interval between the call to call_rcu() and the invocation of "func()". | ||||
|  * This guarantee applies even if CPU A and CPU B are the same CPU (but | ||||
|  * again only if the system has more than one CPU). | ||||
|  * | ||||
|  * Of course, these guarantees apply only for invocations of call_srcu(), | ||||
|  * srcu_read_lock(), and srcu_read_unlock() that are all passed the same | ||||
|  * srcu_struct structure. | ||||
|  */ | ||||
| void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, | ||||
| 	       rcu_callback_t func) | ||||
| { | ||||
| 	unsigned long flags; | ||||
| 	bool needgp = false; | ||||
| 	unsigned long s; | ||||
| 	struct srcu_data *sdp; | ||||
| 
 | ||||
| 	check_init_srcu_struct(sp); | ||||
| 	rhp->func = func; | ||||
| 	local_irq_save(flags); | ||||
| 	sdp = this_cpu_ptr(sp->sda); | ||||
| 	spin_lock(&sdp->lock); | ||||
| 	rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); | ||||
| 	rcu_segcblist_advance(&sdp->srcu_cblist, | ||||
| 			      rcu_seq_current(&sp->srcu_gp_seq)); | ||||
| 	s = rcu_seq_snap(&sp->srcu_gp_seq); | ||||
| 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); | ||||
| 	if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { | ||||
| 		sdp->srcu_gp_seq_needed = s; | ||||
| 		needgp = true; | ||||
| 	} | ||||
| 	spin_unlock_irqrestore(&sdp->lock, flags); | ||||
| 	if (needgp) | ||||
| 		srcu_funnel_gp_start(sp, sdp, s); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(call_srcu); | ||||
| 
 | ||||
| /*
 | ||||
|  * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). | ||||
|  */ | ||||
| static void __synchronize_srcu(struct srcu_struct *sp) | ||||
| { | ||||
| 	struct rcu_synchronize rcu; | ||||
| 
 | ||||
| 	RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || | ||||
| 			 lock_is_held(&rcu_bh_lock_map) || | ||||
| 			 lock_is_held(&rcu_lock_map) || | ||||
| 			 lock_is_held(&rcu_sched_lock_map), | ||||
| 			 "Illegal synchronize_srcu() in same-type SRCU (or in RCU) read-side critical section"); | ||||
| 
 | ||||
| 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) | ||||
| 		return; | ||||
| 	might_sleep(); | ||||
| 	check_init_srcu_struct(sp); | ||||
| 	init_completion(&rcu.completion); | ||||
| 	init_rcu_head_on_stack(&rcu.head); | ||||
| 	call_srcu(sp, &rcu.head, wakeme_after_rcu); | ||||
| 	wait_for_completion(&rcu.completion); | ||||
| 	destroy_rcu_head_on_stack(&rcu.head); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * synchronize_srcu_expedited - Brute-force SRCU grace period | ||||
|  * @sp: srcu_struct with which to synchronize. | ||||
|  * | ||||
|  * Wait for an SRCU grace period to elapse, but be more aggressive about | ||||
|  * spinning rather than blocking when waiting. | ||||
|  * | ||||
|  * Note that synchronize_srcu_expedited() has the same deadlock and | ||||
|  * memory-ordering properties as does synchronize_srcu(). | ||||
|  */ | ||||
| void synchronize_srcu_expedited(struct srcu_struct *sp) | ||||
| { | ||||
| 	bool do_norm = rcu_gp_is_normal(); | ||||
| 
 | ||||
| 	check_init_srcu_struct(sp); | ||||
| 	if (!do_norm) { | ||||
| 		atomic_inc(&sp->srcu_exp_cnt); | ||||
| 		smp_mb__after_atomic(); /* increment before GP. */ | ||||
| 	} | ||||
| 	__synchronize_srcu(sp); | ||||
| 	if (!do_norm) { | ||||
| 		smp_mb__before_atomic(); /* GP before decrement. */ | ||||
| 		WARN_ON_ONCE(atomic_dec_return(&sp->srcu_exp_cnt) < 0); | ||||
| 	} | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); | ||||
| 
 | ||||
| /**
 | ||||
|  * synchronize_srcu - wait for prior SRCU read-side critical-section completion | ||||
|  * @sp: srcu_struct with which to synchronize. | ||||
|  * | ||||
|  * Wait for the count to drain to zero of both indexes. To avoid the | ||||
|  * possible starvation of synchronize_srcu(), it waits for the count of | ||||
|  * the index=((->srcu_idx & 1) ^ 1) to drain to zero at first, | ||||
|  * and then flip the srcu_idx and wait for the count of the other index. | ||||
|  * | ||||
|  * Can block; must be called from process context. | ||||
|  * | ||||
|  * Note that it is illegal to call synchronize_srcu() from the corresponding | ||||
|  * SRCU read-side critical section; doing so will result in deadlock. | ||||
|  * However, it is perfectly legal to call synchronize_srcu() on one | ||||
|  * srcu_struct from some other srcu_struct's read-side critical section, | ||||
|  * as long as the resulting graph of srcu_structs is acyclic. | ||||
|  * | ||||
|  * There are memory-ordering constraints implied by synchronize_srcu(). | ||||
|  * On systems with more than one CPU, when synchronize_srcu() returns, | ||||
|  * each CPU is guaranteed to have executed a full memory barrier since | ||||
|  * the end of its last corresponding SRCU-sched read-side critical section | ||||
|  * whose beginning preceded the call to synchronize_srcu().  In addition, | ||||
|  * each CPU having an SRCU read-side critical section that extends beyond | ||||
|  * the return from synchronize_srcu() is guaranteed to have executed a | ||||
|  * full memory barrier after the beginning of synchronize_srcu() and before | ||||
|  * the beginning of that SRCU read-side critical section.  Note that these | ||||
|  * guarantees include CPUs that are offline, idle, or executing in user mode, | ||||
|  * as well as CPUs that are executing in the kernel. | ||||
|  * | ||||
|  * Furthermore, if CPU A invoked synchronize_srcu(), which returned | ||||
|  * to its caller on CPU B, then both CPU A and CPU B are guaranteed | ||||
|  * to have executed a full memory barrier during the execution of | ||||
|  * synchronize_srcu().  This guarantee applies even if CPU A and CPU B | ||||
|  * are the same CPU, but again only if the system has more than one CPU. | ||||
|  * | ||||
|  * Of course, these memory-ordering guarantees apply only when | ||||
|  * synchronize_srcu(), srcu_read_lock(), and srcu_read_unlock() are | ||||
|  * passed the same srcu_struct structure. | ||||
|  */ | ||||
| void synchronize_srcu(struct srcu_struct *sp) | ||||
| { | ||||
| 	if (rcu_gp_is_expedited()) | ||||
| 		synchronize_srcu_expedited(sp); | ||||
| 	else | ||||
| 		__synchronize_srcu(sp); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(synchronize_srcu); | ||||
| 
 | ||||
| /*
 | ||||
|  * Callback function for srcu_barrier() use. | ||||
|  */ | ||||
| static void srcu_barrier_cb(struct rcu_head *rhp) | ||||
| { | ||||
| 	struct srcu_data *sdp; | ||||
| 	struct srcu_struct *sp; | ||||
| 
 | ||||
| 	sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); | ||||
| 	sp = sdp->sp; | ||||
| 	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) | ||||
| 		complete(&sp->srcu_barrier_completion); | ||||
| } | ||||
| 
 | ||||
| /**
 | ||||
|  * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. | ||||
|  * @sp: srcu_struct on which to wait for in-flight callbacks. | ||||
|  */ | ||||
| void srcu_barrier(struct srcu_struct *sp) | ||||
| { | ||||
| 	int cpu; | ||||
| 	struct srcu_data *sdp; | ||||
| 	unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); | ||||
| 
 | ||||
| 	check_init_srcu_struct(sp); | ||||
| 	mutex_lock(&sp->srcu_barrier_mutex); | ||||
| 	if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { | ||||
| 		smp_mb(); /* Force ordering following return. */ | ||||
| 		mutex_unlock(&sp->srcu_barrier_mutex); | ||||
| 		return; /* Someone else did our work for us. */ | ||||
| 	} | ||||
| 	rcu_seq_start(&sp->srcu_barrier_seq); | ||||
| 	init_completion(&sp->srcu_barrier_completion); | ||||
| 
 | ||||
| 	/* Initial count prevents reaching zero until all CBs are posted. */ | ||||
| 	atomic_set(&sp->srcu_barrier_cpu_cnt, 1); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Each pass through this loop enqueues a callback, but only | ||||
| 	 * on CPUs already having callbacks enqueued.  Note that if | ||||
| 	 * a CPU already has callbacks enqueue, it must have already | ||||
| 	 * registered the need for a future grace period, so all we | ||||
| 	 * need do is enqueue a callback that will use the same | ||||
| 	 * grace period as the last callback already in the queue. | ||||
| 	 */ | ||||
| 	for_each_possible_cpu(cpu) { | ||||
| 		sdp = per_cpu_ptr(sp->sda, cpu); | ||||
| 		spin_lock_irq(&sdp->lock); | ||||
| 		atomic_inc(&sp->srcu_barrier_cpu_cnt); | ||||
| 		sdp->srcu_barrier_head.func = srcu_barrier_cb; | ||||
| 		if (!rcu_segcblist_entrain(&sdp->srcu_cblist, | ||||
| 					   &sdp->srcu_barrier_head, 0)) | ||||
| 			atomic_dec(&sp->srcu_barrier_cpu_cnt); | ||||
| 		spin_unlock_irq(&sdp->lock); | ||||
| 	} | ||||
| 
 | ||||
| 	/* Remove the initial count, at which point reaching zero can happen. */ | ||||
| 	if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) | ||||
| 		complete(&sp->srcu_barrier_completion); | ||||
| 	wait_for_completion(&sp->srcu_barrier_completion); | ||||
| 
 | ||||
| 	rcu_seq_end(&sp->srcu_barrier_seq); | ||||
| 	mutex_unlock(&sp->srcu_barrier_mutex); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(srcu_barrier); | ||||
| 
 | ||||
| /**
 | ||||
|  * srcu_batches_completed - return batches completed. | ||||
|  * @sp: srcu_struct on which to report batch completion. | ||||
|  * | ||||
|  * Report the number of batches, correlated with, but not necessarily | ||||
|  * precisely the same as, the number of grace periods that have elapsed. | ||||
|  */ | ||||
| unsigned long srcu_batches_completed(struct srcu_struct *sp) | ||||
| { | ||||
| 	return sp->srcu_idx; | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(srcu_batches_completed); | ||||
| 
 | ||||
| /*
 | ||||
|  * Core SRCU state machine.  Push state bits of ->srcu_gp_seq | ||||
|  * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has | ||||
|  * completed in that state. | ||||
|  */ | ||||
| static void srcu_advance_state(struct srcu_struct *sp) | ||||
| { | ||||
| 	int idx; | ||||
| 
 | ||||
| 	mutex_lock(&sp->srcu_gp_mutex); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Because readers might be delayed for an extended period after | ||||
| 	 * fetching ->srcu_idx for their index, at any point in time there | ||||
| 	 * might well be readers using both idx=0 and idx=1.  We therefore | ||||
| 	 * need to wait for readers to clear from both index values before | ||||
| 	 * invoking a callback. | ||||
| 	 * | ||||
| 	 * The load-acquire ensures that we see the accesses performed | ||||
| 	 * by the prior grace period. | ||||
| 	 */ | ||||
| 	idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ | ||||
| 	if (idx == SRCU_STATE_IDLE) { | ||||
| 		spin_lock_irq(&sp->gp_lock); | ||||
| 		if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { | ||||
| 			WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); | ||||
| 			spin_unlock_irq(&sp->gp_lock); | ||||
| 			mutex_unlock(&sp->srcu_gp_mutex); | ||||
| 			return; | ||||
| 		} | ||||
| 		idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); | ||||
| 		if (idx == SRCU_STATE_IDLE) | ||||
| 			srcu_gp_start(sp); | ||||
| 		spin_unlock_irq(&sp->gp_lock); | ||||
| 		if (idx != SRCU_STATE_IDLE) { | ||||
| 			mutex_unlock(&sp->srcu_gp_mutex); | ||||
| 			return; /* Someone else started the grace period. */ | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { | ||||
| 		idx = 1 ^ (sp->srcu_idx & 1); | ||||
| 		if (!try_check_zero(sp, idx, 1)) { | ||||
| 			mutex_unlock(&sp->srcu_gp_mutex); | ||||
| 			return; /* readers present, retry later. */ | ||||
| 		} | ||||
| 		srcu_flip(sp); | ||||
| 		rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); | ||||
| 	} | ||||
| 
 | ||||
| 	if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { | ||||
| 
 | ||||
| 		/*
 | ||||
| 		 * SRCU read-side critical sections are normally short, | ||||
| 		 * so check at least twice in quick succession after a flip. | ||||
| 		 */ | ||||
| 		idx = 1 ^ (sp->srcu_idx & 1); | ||||
| 		if (!try_check_zero(sp, idx, 2)) { | ||||
| 			mutex_unlock(&sp->srcu_gp_mutex); | ||||
| 			return; /* readers present, retry later. */ | ||||
| 		} | ||||
| 		srcu_gp_end(sp);  /* Releases ->srcu_gp_mutex. */ | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Invoke a limited number of SRCU callbacks that have passed through | ||||
|  * their grace period.  If there are more to do, SRCU will reschedule | ||||
|  * the workqueue.  Note that needed memory barriers have been executed | ||||
|  * in this task's context by srcu_readers_active_idx_check(). | ||||
|  */ | ||||
| static void srcu_invoke_callbacks(struct work_struct *work) | ||||
| { | ||||
| 	bool more; | ||||
| 	struct rcu_cblist ready_cbs; | ||||
| 	struct rcu_head *rhp; | ||||
| 	struct srcu_data *sdp; | ||||
| 	struct srcu_struct *sp; | ||||
| 
 | ||||
| 	sdp = container_of(work, struct srcu_data, work.work); | ||||
| 	sp = sdp->sp; | ||||
| 	rcu_cblist_init(&ready_cbs); | ||||
| 	spin_lock_irq(&sdp->lock); | ||||
| 	smp_mb(); /* Old grace periods before callback invocation! */ | ||||
| 	rcu_segcblist_advance(&sdp->srcu_cblist, | ||||
| 			      rcu_seq_current(&sp->srcu_gp_seq)); | ||||
| 	if (sdp->srcu_cblist_invoking || | ||||
| 	    !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { | ||||
| 		spin_unlock_irq(&sdp->lock); | ||||
| 		return;  /* Someone else on the job or nothing to do. */ | ||||
| 	} | ||||
| 
 | ||||
| 	/* We are on the job!  Extract and invoke ready callbacks. */ | ||||
| 	sdp->srcu_cblist_invoking = true; | ||||
| 	rcu_segcblist_extract_done_cbs(&sdp->srcu_cblist, &ready_cbs); | ||||
| 	spin_unlock_irq(&sdp->lock); | ||||
| 	rhp = rcu_cblist_dequeue(&ready_cbs); | ||||
| 	for (; rhp != NULL; rhp = rcu_cblist_dequeue(&ready_cbs)) { | ||||
| 		local_bh_disable(); | ||||
| 		rhp->func(rhp); | ||||
| 		local_bh_enable(); | ||||
| 	} | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Update counts, accelerate new callbacks, and if needed, | ||||
| 	 * schedule another round of callback invocation. | ||||
| 	 */ | ||||
| 	spin_lock_irq(&sdp->lock); | ||||
| 	rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); | ||||
| 	(void)rcu_segcblist_accelerate(&sdp->srcu_cblist, | ||||
| 				       rcu_seq_snap(&sp->srcu_gp_seq)); | ||||
| 	sdp->srcu_cblist_invoking = false; | ||||
| 	more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); | ||||
| 	spin_unlock_irq(&sdp->lock); | ||||
| 	if (more) | ||||
| 		srcu_schedule_cbs_sdp(sdp, 0); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * Finished one round of SRCU grace period.  Start another if there are | ||||
|  * more SRCU callbacks queued, otherwise put SRCU into not-running state. | ||||
|  */ | ||||
| static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) | ||||
| { | ||||
| 	bool pushgp = true; | ||||
| 
 | ||||
| 	spin_lock_irq(&sp->gp_lock); | ||||
| 	if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { | ||||
| 		if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { | ||||
| 			/* All requests fulfilled, time to go idle. */ | ||||
| 			pushgp = false; | ||||
| 		} | ||||
| 	} else if (!rcu_seq_state(sp->srcu_gp_seq)) { | ||||
| 		/* Outstanding request and no GP.  Start one. */ | ||||
| 		srcu_gp_start(sp); | ||||
| 	} | ||||
| 	spin_unlock_irq(&sp->gp_lock); | ||||
| 
 | ||||
| 	if (pushgp) | ||||
| 		queue_delayed_work(system_power_efficient_wq, &sp->work, delay); | ||||
| } | ||||
| 
 | ||||
| /*
 | ||||
|  * This is the work-queue function that handles SRCU grace periods. | ||||
|  */ | ||||
| void process_srcu(struct work_struct *work) | ||||
| { | ||||
| 	struct srcu_struct *sp; | ||||
| 
 | ||||
| 	sp = container_of(work, struct srcu_struct, work.work); | ||||
| 
 | ||||
| 	srcu_advance_state(sp); | ||||
| 	srcu_reschedule(sp, atomic_read(&sp->srcu_exp_cnt) ? 0 : SRCU_INTERVAL); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(process_srcu); | ||||
|  | @ -79,7 +79,7 @@ EXPORT_SYMBOL(__rcu_is_watching); | |||
|  */ | ||||
| static int rcu_qsctr_help(struct rcu_ctrlblk *rcp) | ||||
| { | ||||
| 	RCU_TRACE(reset_cpu_stall_ticks(rcp)); | ||||
| 	RCU_TRACE(reset_cpu_stall_ticks(rcp);) | ||||
| 	if (rcp->donetail != rcp->curtail) { | ||||
| 		rcp->donetail = rcp->curtail; | ||||
| 		return 1; | ||||
|  | @ -125,7 +125,7 @@ void rcu_bh_qs(void) | |||
|  */ | ||||
| void rcu_check_callbacks(int user) | ||||
| { | ||||
| 	RCU_TRACE(check_cpu_stalls()); | ||||
| 	RCU_TRACE(check_cpu_stalls();) | ||||
| 	if (user) | ||||
| 		rcu_sched_qs(); | ||||
| 	else if (!in_softirq()) | ||||
|  | @ -143,7 +143,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 	const char *rn = NULL; | ||||
| 	struct rcu_head *next, *list; | ||||
| 	unsigned long flags; | ||||
| 	RCU_TRACE(int cb_count = 0); | ||||
| 	RCU_TRACE(int cb_count = 0;) | ||||
| 
 | ||||
| 	/* Move the ready-to-invoke callbacks to a local list. */ | ||||
| 	local_irq_save(flags); | ||||
|  | @ -152,7 +152,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 		local_irq_restore(flags); | ||||
| 		return; | ||||
| 	} | ||||
| 	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1)); | ||||
| 	RCU_TRACE(trace_rcu_batch_start(rcp->name, 0, rcp->qlen, -1);) | ||||
| 	list = rcp->rcucblist; | ||||
| 	rcp->rcucblist = *rcp->donetail; | ||||
| 	*rcp->donetail = NULL; | ||||
|  | @ -162,7 +162,7 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 	local_irq_restore(flags); | ||||
| 
 | ||||
| 	/* Invoke the callbacks on the local list. */ | ||||
| 	RCU_TRACE(rn = rcp->name); | ||||
| 	RCU_TRACE(rn = rcp->name;) | ||||
| 	while (list) { | ||||
| 		next = list->next; | ||||
| 		prefetch(next); | ||||
|  | @ -171,9 +171,9 @@ static void __rcu_process_callbacks(struct rcu_ctrlblk *rcp) | |||
| 		__rcu_reclaim(rn, list); | ||||
| 		local_bh_enable(); | ||||
| 		list = next; | ||||
| 		RCU_TRACE(cb_count++); | ||||
| 		RCU_TRACE(cb_count++;) | ||||
| 	} | ||||
| 	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count)); | ||||
| 	RCU_TRACE(rcu_trace_sub_qlen(rcp, cb_count);) | ||||
| 	RCU_TRACE(trace_rcu_batch_end(rcp->name, | ||||
| 				      cb_count, 0, need_resched(), | ||||
| 				      is_idle_task(current), | ||||
|  | @ -221,7 +221,7 @@ static void __call_rcu(struct rcu_head *head, | |||
| 	local_irq_save(flags); | ||||
| 	*rcp->curtail = head; | ||||
| 	rcp->curtail = &head->next; | ||||
| 	RCU_TRACE(rcp->qlen++); | ||||
| 	RCU_TRACE(rcp->qlen++;) | ||||
| 	local_irq_restore(flags); | ||||
| 
 | ||||
| 	if (unlikely(is_idle_task(current))) { | ||||
|  | @ -254,8 +254,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh); | |||
| void __init rcu_init(void) | ||||
| { | ||||
| 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); | ||||
| 	RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk)); | ||||
| 	RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk)); | ||||
| 	RCU_TRACE(reset_cpu_stall_ticks(&rcu_sched_ctrlblk);) | ||||
| 	RCU_TRACE(reset_cpu_stall_ticks(&rcu_bh_ctrlblk);) | ||||
| 
 | ||||
| 	rcu_early_boot_tests(); | ||||
| } | ||||
|  |  | |||
|  | @ -52,7 +52,7 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = { | |||
| 	RCU_TRACE(.name = "rcu_bh") | ||||
| }; | ||||
| 
 | ||||
| #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||||
| #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) | ||||
| #include <linux/kernel_stat.h> | ||||
| 
 | ||||
| int rcu_scheduler_active __read_mostly; | ||||
|  | @ -65,15 +65,16 @@ EXPORT_SYMBOL_GPL(rcu_scheduler_active); | |||
|  * to RCU_SCHEDULER_RUNNING, skipping the RCU_SCHEDULER_INIT stage. | ||||
|  * The reason for this is that Tiny RCU does not need kthreads, so does | ||||
|  * not have to care about the fact that the scheduler is half-initialized | ||||
|  * at a certain phase of the boot process. | ||||
|  * at a certain phase of the boot process.  Unless SRCU is in the mix. | ||||
|  */ | ||||
| void __init rcu_scheduler_starting(void) | ||||
| { | ||||
| 	WARN_ON(nr_context_switches() > 0); | ||||
| 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING; | ||||
| 	rcu_scheduler_active = IS_ENABLED(CONFIG_SRCU) | ||||
| 		? RCU_SCHEDULER_INIT : RCU_SCHEDULER_RUNNING; | ||||
| } | ||||
| 
 | ||||
| #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ | ||||
| #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
| 
 | ||||
|  | @ -162,8 +163,8 @@ static void reset_cpu_stall_ticks(struct rcu_ctrlblk *rcp) | |||
| 
 | ||||
| static void check_cpu_stalls(void) | ||||
| { | ||||
| 	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk)); | ||||
| 	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk)); | ||||
| 	RCU_TRACE(check_cpu_stall(&rcu_bh_ctrlblk);) | ||||
| 	RCU_TRACE(check_cpu_stall(&rcu_sched_ctrlblk);) | ||||
| } | ||||
| 
 | ||||
| #endif /* #ifdef CONFIG_RCU_TRACE */ | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -30,80 +30,8 @@ | |||
| #include <linux/seqlock.h> | ||||
| #include <linux/swait.h> | ||||
| #include <linux/stop_machine.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and | ||||
|  * CONFIG_RCU_FANOUT_LEAF. | ||||
|  * In theory, it should be possible to add more levels straightforwardly. | ||||
|  * In practice, this did work well going from three levels to four. | ||||
|  * Of course, your mileage may vary. | ||||
|  */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_FANOUT | ||||
| #define RCU_FANOUT CONFIG_RCU_FANOUT | ||||
| #else /* #ifdef CONFIG_RCU_FANOUT */ | ||||
| # ifdef CONFIG_64BIT | ||||
| # define RCU_FANOUT 64 | ||||
| # else | ||||
| # define RCU_FANOUT 32 | ||||
| # endif | ||||
| #endif /* #else #ifdef CONFIG_RCU_FANOUT */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_FANOUT_LEAF | ||||
| #define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF | ||||
| #else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ | ||||
| # ifdef CONFIG_64BIT | ||||
| # define RCU_FANOUT_LEAF 64 | ||||
| # else | ||||
| # define RCU_FANOUT_LEAF 32 | ||||
| # endif | ||||
| #endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ | ||||
| 
 | ||||
| #define RCU_FANOUT_1	      (RCU_FANOUT_LEAF) | ||||
| #define RCU_FANOUT_2	      (RCU_FANOUT_1 * RCU_FANOUT) | ||||
| #define RCU_FANOUT_3	      (RCU_FANOUT_2 * RCU_FANOUT) | ||||
| #define RCU_FANOUT_4	      (RCU_FANOUT_3 * RCU_FANOUT) | ||||
| 
 | ||||
| #if NR_CPUS <= RCU_FANOUT_1 | ||||
| #  define RCU_NUM_LVLS	      1 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_NODES	      NUM_RCU_LVL_0 | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0" } | ||||
| #elif NR_CPUS <= RCU_FANOUT_2 | ||||
| #  define RCU_NUM_LVLS	      2 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | ||||
| #  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1) | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1" } | ||||
| #elif NR_CPUS <= RCU_FANOUT_3 | ||||
| #  define RCU_NUM_LVLS	      3 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | ||||
| #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | ||||
| #  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2) | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } | ||||
| #elif NR_CPUS <= RCU_FANOUT_4 | ||||
| #  define RCU_NUM_LVLS	      4 | ||||
| #  define NUM_RCU_LVL_0	      1 | ||||
| #  define NUM_RCU_LVL_1	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) | ||||
| #  define NUM_RCU_LVL_2	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) | ||||
| #  define NUM_RCU_LVL_3	      DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) | ||||
| #  define NUM_RCU_NODES	      (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) | ||||
| #  define NUM_RCU_LVL_INIT    { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } | ||||
| #  define RCU_NODE_NAME_INIT  { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } | ||||
| #  define RCU_FQS_NAME_INIT   { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } | ||||
| #else | ||||
| # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" | ||||
| #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ | ||||
| 
 | ||||
| extern int rcu_num_lvls; | ||||
| extern int rcu_num_nodes; | ||||
| #include <linux/rcu_segcblist.h> | ||||
| #include <linux/rcu_node_tree.h> | ||||
| 
 | ||||
| /*
 | ||||
|  * Dynticks per-CPU state. | ||||
|  | @ -113,6 +41,9 @@ struct rcu_dynticks { | |||
| 				    /* Process level is worth LLONG_MAX/2. */ | ||||
| 	int dynticks_nmi_nesting;   /* Track NMI nesting level. */ | ||||
| 	atomic_t dynticks;	    /* Even value for idle, else odd. */ | ||||
| 	bool rcu_need_heavy_qs;     /* GP old, need heavy quiescent state. */ | ||||
| 	unsigned long rcu_qs_ctr;   /* Light universal quiescent state ctr. */ | ||||
| 	bool rcu_urgent_qs;	    /* GP old need light quiescent state. */ | ||||
| #ifdef CONFIG_NO_HZ_FULL_SYSIDLE | ||||
| 	long long dynticks_idle_nesting; | ||||
| 				    /* irq/process nesting level from idle. */ | ||||
|  | @ -261,41 +192,6 @@ struct rcu_node { | |||
|  */ | ||||
| #define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo)) | ||||
| 
 | ||||
| /*
 | ||||
|  * Do a full breadth-first scan of the rcu_node structures for the | ||||
|  * specified rcu_state structure. | ||||
|  */ | ||||
| #define rcu_for_each_node_breadth_first(rsp, rnp) \ | ||||
| 	for ((rnp) = &(rsp)->node[0]; \ | ||||
| 	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) | ||||
| 
 | ||||
| /*
 | ||||
|  * Do a breadth-first scan of the non-leaf rcu_node structures for the | ||||
|  * specified rcu_state structure.  Note that if there is a singleton | ||||
|  * rcu_node tree with but one rcu_node structure, this loop is a no-op. | ||||
|  */ | ||||
| #define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \ | ||||
| 	for ((rnp) = &(rsp)->node[0]; \ | ||||
| 	     (rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++) | ||||
| 
 | ||||
| /*
 | ||||
|  * Scan the leaves of the rcu_node hierarchy for the specified rcu_state | ||||
|  * structure.  Note that if there is a singleton rcu_node tree with but | ||||
|  * one rcu_node structure, this loop -will- visit the rcu_node structure. | ||||
|  * It is still a leaf node, even if it is also the root node. | ||||
|  */ | ||||
| #define rcu_for_each_leaf_node(rsp, rnp) \ | ||||
| 	for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \ | ||||
| 	     (rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++) | ||||
| 
 | ||||
| /*
 | ||||
|  * Iterate over all possible CPUs in a leaf RCU node. | ||||
|  */ | ||||
| #define for_each_leaf_node_possible_cpu(rnp, cpu) \ | ||||
| 	for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \ | ||||
| 	     cpu <= rnp->grphi; \ | ||||
| 	     cpu = cpumask_next((cpu), cpu_possible_mask)) | ||||
| 
 | ||||
| /*
 | ||||
|  * Union to allow "aggregate OR" operation on the need for a quiescent | ||||
|  * state by the normal and expedited grace periods. | ||||
|  | @ -336,34 +232,9 @@ struct rcu_data { | |||
| 					/* period it is aware of. */ | ||||
| 
 | ||||
| 	/* 2) batch handling */ | ||||
| 	/*
 | ||||
| 	 * If nxtlist is not NULL, it is partitioned as follows. | ||||
| 	 * Any of the partitions might be empty, in which case the | ||||
| 	 * pointer to that partition will be equal to the pointer for | ||||
| 	 * the following partition.  When the list is empty, all of | ||||
| 	 * the nxttail elements point to the ->nxtlist pointer itself, | ||||
| 	 * which in that case is NULL. | ||||
| 	 * | ||||
| 	 * [nxtlist, *nxttail[RCU_DONE_TAIL]): | ||||
| 	 *	Entries that batch # <= ->completed | ||||
| 	 *	The grace period for these entries has completed, and | ||||
| 	 *	the other grace-period-completed entries may be moved | ||||
| 	 *	here temporarily in rcu_process_callbacks(). | ||||
| 	 * [*nxttail[RCU_DONE_TAIL], *nxttail[RCU_WAIT_TAIL]): | ||||
| 	 *	Entries that batch # <= ->completed - 1: waiting for current GP | ||||
| 	 * [*nxttail[RCU_WAIT_TAIL], *nxttail[RCU_NEXT_READY_TAIL]): | ||||
| 	 *	Entries known to have arrived before current GP ended | ||||
| 	 * [*nxttail[RCU_NEXT_READY_TAIL], *nxttail[RCU_NEXT_TAIL]): | ||||
| 	 *	Entries that might have arrived after current GP ended | ||||
| 	 *	Note that the value of *nxttail[RCU_NEXT_TAIL] will | ||||
| 	 *	always be NULL, as this is the end of the list. | ||||
| 	 */ | ||||
| 	struct rcu_head *nxtlist; | ||||
| 	struct rcu_head **nxttail[RCU_NEXT_SIZE]; | ||||
| 	unsigned long	nxtcompleted[RCU_NEXT_SIZE]; | ||||
| 					/* grace periods for sublists. */ | ||||
| 	long		qlen_lazy;	/* # of lazy queued callbacks */ | ||||
| 	long		qlen;		/* # of queued callbacks, incl lazy */ | ||||
| 	struct rcu_segcblist cblist;	/* Segmented callback list, with */ | ||||
| 					/* different callbacks waiting for */ | ||||
| 					/* different grace periods. */ | ||||
| 	long		qlen_last_fqs_check; | ||||
| 					/* qlen at last check for QS forcing */ | ||||
| 	unsigned long	n_cbs_invoked;	/* count of RCU cbs invoked. */ | ||||
|  | @ -482,7 +353,6 @@ struct rcu_state { | |||
| 	struct rcu_node *level[RCU_NUM_LVLS + 1]; | ||||
| 						/* Hierarchy levels (+1 to */ | ||||
| 						/*  shut bogus gcc warning) */ | ||||
| 	u8 flavor_mask;				/* bit in flavor mask. */ | ||||
| 	struct rcu_data __percpu *rda;		/* pointer of percu rcu_data. */ | ||||
| 	call_rcu_func_t call;			/* call_rcu() flavor. */ | ||||
| 	int ncpus;				/* # CPUs seen so far. */ | ||||
|  | @ -502,14 +372,11 @@ struct rcu_state { | |||
| 
 | ||||
| 	raw_spinlock_t orphan_lock ____cacheline_internodealigned_in_smp; | ||||
| 						/* Protect following fields. */ | ||||
| 	struct rcu_head *orphan_nxtlist;	/* Orphaned callbacks that */ | ||||
| 	struct rcu_cblist orphan_pend;		/* Orphaned callbacks that */ | ||||
| 						/*  need a grace period. */ | ||||
| 	struct rcu_head **orphan_nxttail;	/* Tail of above. */ | ||||
| 	struct rcu_head *orphan_donelist;	/* Orphaned callbacks that */ | ||||
| 	struct rcu_cblist orphan_done;		/* Orphaned callbacks that */ | ||||
| 						/*  are ready to invoke. */ | ||||
| 	struct rcu_head **orphan_donetail;	/* Tail of above. */ | ||||
| 	long qlen_lazy;				/* Number of lazy callbacks. */ | ||||
| 	long qlen;				/* Total number of callbacks. */ | ||||
| 						/* (Contains counts.) */ | ||||
| 	/* End of fields guarded by orphan_lock. */ | ||||
| 
 | ||||
| 	struct mutex barrier_mutex;		/* Guards barrier fields. */ | ||||
|  | @ -596,6 +463,7 @@ extern struct rcu_state rcu_preempt_state; | |||
| #endif /* #ifdef CONFIG_PREEMPT_RCU */ | ||||
| 
 | ||||
| int rcu_dynticks_snap(struct rcu_dynticks *rdtp); | ||||
| bool rcu_eqs_special_set(int cpu); | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); | ||||
|  | @ -673,6 +541,14 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp); | |||
| static void rcu_dynticks_task_enter(void); | ||||
| static void rcu_dynticks_task_exit(void); | ||||
| 
 | ||||
| #ifdef CONFIG_SRCU | ||||
| void srcu_online_cpu(unsigned int cpu); | ||||
| void srcu_offline_cpu(unsigned int cpu); | ||||
| #else /* #ifdef CONFIG_SRCU */ | ||||
| void srcu_online_cpu(unsigned int cpu) { } | ||||
| void srcu_offline_cpu(unsigned int cpu) { } | ||||
| #endif /* #else #ifdef CONFIG_SRCU */ | ||||
| 
 | ||||
| #endif /* #ifndef RCU_TREE_NONCORE */ | ||||
| 
 | ||||
| #ifdef CONFIG_RCU_TRACE | ||||
|  |  | |||
|  | @ -292,7 +292,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s) | |||
| 			trace_rcu_exp_funnel_lock(rsp->name, rnp->level, | ||||
| 						  rnp->grplo, rnp->grphi, | ||||
| 						  TPS("wait")); | ||||
| 			wait_event(rnp->exp_wq[(s >> 1) & 0x3], | ||||
| 			wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], | ||||
| 				   sync_exp_work_done(rsp, | ||||
| 						      &rdp->exp_workdone2, s)); | ||||
| 			return true; | ||||
|  | @ -331,6 +331,8 @@ static void sync_sched_exp_handler(void *data) | |||
| 		return; | ||||
| 	} | ||||
| 	__this_cpu_write(rcu_sched_data.cpu_no_qs.b.exp, true); | ||||
| 	/* Store .exp before .rcu_urgent_qs. */ | ||||
| 	smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); | ||||
| 	resched_cpu(smp_processor_id()); | ||||
| } | ||||
| 
 | ||||
|  | @ -531,7 +533,8 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s) | |||
| 				rnp->exp_seq_rq = s; | ||||
| 			spin_unlock(&rnp->exp_lock); | ||||
| 		} | ||||
| 		wake_up_all(&rnp->exp_wq[(rsp->expedited_sequence >> 1) & 0x3]); | ||||
| 		smp_mb(); /* All above changes before wakeup. */ | ||||
| 		wake_up_all(&rnp->exp_wq[rcu_seq_ctr(rsp->expedited_sequence) & 0x3]); | ||||
| 	} | ||||
| 	trace_rcu_exp_grace_period(rsp->name, s, TPS("endwake")); | ||||
| 	mutex_unlock(&rsp->exp_wake_mutex); | ||||
|  | @ -609,9 +612,9 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp, | |||
| 	/* Wait for expedited grace period to complete. */ | ||||
| 	rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id()); | ||||
| 	rnp = rcu_get_root(rsp); | ||||
| 	wait_event(rnp->exp_wq[(s >> 1) & 0x3], | ||||
| 		   sync_exp_work_done(rsp, | ||||
| 				      &rdp->exp_workdone0, s)); | ||||
| 	wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3], | ||||
| 		   sync_exp_work_done(rsp, &rdp->exp_workdone0, s)); | ||||
| 	smp_mb(); /* Workqueue actions happen before return. */ | ||||
| 
 | ||||
| 	/* Let the next expedited grace period start. */ | ||||
| 	mutex_unlock(&rsp->exp_mutex); | ||||
|  | @ -735,15 +738,3 @@ void synchronize_rcu_expedited(void) | |||
| EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); | ||||
| 
 | ||||
| #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Switch to run-time mode once Tree RCU has fully initialized. | ||||
|  */ | ||||
| static int __init rcu_exp_runtime_mode(void) | ||||
| { | ||||
| 	rcu_test_sync_prims(); | ||||
| 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING; | ||||
| 	rcu_test_sync_prims(); | ||||
| 	return 0; | ||||
| } | ||||
| core_initcall(rcu_exp_runtime_mode); | ||||
|  |  | |||
|  | @ -1350,10 +1350,10 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void) | |||
| 		 */ | ||||
| 		if ((rdp->completed != rnp->completed || | ||||
| 		     unlikely(READ_ONCE(rdp->gpwrap))) && | ||||
| 		    rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) | ||||
| 		    rcu_segcblist_pend_cbs(&rdp->cblist)) | ||||
| 			note_gp_changes(rsp, rdp); | ||||
| 
 | ||||
| 		if (cpu_has_callbacks_ready_to_invoke(rdp)) | ||||
| 		if (rcu_segcblist_ready_cbs(&rdp->cblist)) | ||||
| 			cbs_ready = true; | ||||
| 	} | ||||
| 	return cbs_ready; | ||||
|  | @ -1461,7 +1461,7 @@ static void rcu_prepare_for_idle(void) | |||
| 	rdtp->last_accelerate = jiffies; | ||||
| 	for_each_rcu_flavor(rsp) { | ||||
| 		rdp = this_cpu_ptr(rsp->rda); | ||||
| 		if (!*rdp->nxttail[RCU_DONE_TAIL]) | ||||
| 		if (rcu_segcblist_pend_cbs(&rdp->cblist)) | ||||
| 			continue; | ||||
| 		rnp = rdp->mynode; | ||||
| 		raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */ | ||||
|  | @ -1529,7 +1529,7 @@ static void rcu_oom_notify_cpu(void *unused) | |||
| 
 | ||||
| 	for_each_rcu_flavor(rsp) { | ||||
| 		rdp = raw_cpu_ptr(rsp->rda); | ||||
| 		if (rdp->qlen_lazy != 0) { | ||||
| 		if (rcu_segcblist_n_lazy_cbs(&rdp->cblist)) { | ||||
| 			atomic_inc(&oom_callback_count); | ||||
| 			rsp->call(&rdp->oom_head, rcu_oom_callback); | ||||
| 		} | ||||
|  | @ -1709,7 +1709,7 @@ __setup("rcu_nocbs=", rcu_nocb_setup); | |||
| 
 | ||||
| static int __init parse_rcu_nocb_poll(char *arg) | ||||
| { | ||||
| 	rcu_nocb_poll = 1; | ||||
| 	rcu_nocb_poll = true; | ||||
| 	return 0; | ||||
| } | ||||
| early_param("rcu_nocb_poll", parse_rcu_nocb_poll); | ||||
|  | @ -1860,7 +1860,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
| 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||||
| 					    TPS("WakeEmpty")); | ||||
| 		} else { | ||||
| 			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE; | ||||
| 			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE); | ||||
| 			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ | ||||
| 			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); | ||||
| 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||||
| 					    TPS("WakeEmptyIsDeferred")); | ||||
| 		} | ||||
|  | @ -1872,7 +1874,9 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, | |||
| 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||||
| 					    TPS("WakeOvf")); | ||||
| 		} else { | ||||
| 			rdp->nocb_defer_wakeup = RCU_NOGP_WAKE_FORCE; | ||||
| 			WRITE_ONCE(rdp->nocb_defer_wakeup, RCU_NOGP_WAKE_FORCE); | ||||
| 			/* Store ->nocb_defer_wakeup before ->rcu_urgent_qs. */ | ||||
| 			smp_store_release(this_cpu_ptr(&rcu_dynticks.rcu_urgent_qs), true); | ||||
| 			trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, | ||||
| 					    TPS("WakeOvfIsDeferred")); | ||||
| 		} | ||||
|  | @ -1930,30 +1934,26 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, | |||
| 						     struct rcu_data *rdp, | ||||
| 						     unsigned long flags) | ||||
| { | ||||
| 	long ql = rsp->qlen; | ||||
| 	long qll = rsp->qlen_lazy; | ||||
| 	long ql = rcu_cblist_n_cbs(&rsp->orphan_done); | ||||
| 	long qll = rcu_cblist_n_lazy_cbs(&rsp->orphan_done); | ||||
| 
 | ||||
| 	/* If this is not a no-CBs CPU, tell the caller to do it the old way. */ | ||||
| 	if (!rcu_is_nocb_cpu(smp_processor_id())) | ||||
| 		return false; | ||||
| 	rsp->qlen = 0; | ||||
| 	rsp->qlen_lazy = 0; | ||||
| 
 | ||||
| 	/* First, enqueue the donelist, if any.  This preserves CB ordering. */ | ||||
| 	if (rsp->orphan_donelist != NULL) { | ||||
| 		__call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist, | ||||
| 					rsp->orphan_donetail, ql, qll, flags); | ||||
| 		ql = qll = 0; | ||||
| 		rsp->orphan_donelist = NULL; | ||||
| 		rsp->orphan_donetail = &rsp->orphan_donelist; | ||||
| 	if (!rcu_cblist_empty(&rsp->orphan_done)) { | ||||
| 		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_done), | ||||
| 					rcu_cblist_tail(&rsp->orphan_done), | ||||
| 					ql, qll, flags); | ||||
| 	} | ||||
| 	if (rsp->orphan_nxtlist != NULL) { | ||||
| 		__call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist, | ||||
| 					rsp->orphan_nxttail, ql, qll, flags); | ||||
| 		ql = qll = 0; | ||||
| 		rsp->orphan_nxtlist = NULL; | ||||
| 		rsp->orphan_nxttail = &rsp->orphan_nxtlist; | ||||
| 	if (!rcu_cblist_empty(&rsp->orphan_pend)) { | ||||
| 		__call_rcu_nocb_enqueue(rdp, rcu_cblist_head(&rsp->orphan_pend), | ||||
| 					rcu_cblist_tail(&rsp->orphan_pend), | ||||
| 					ql, qll, flags); | ||||
| 	} | ||||
| 	rcu_cblist_init(&rsp->orphan_done); | ||||
| 	rcu_cblist_init(&rsp->orphan_pend); | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
|  | @ -2395,16 +2395,16 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) | |||
| 		return false; | ||||
| 
 | ||||
| 	/* If there are early-boot callbacks, move them to nocb lists. */ | ||||
| 	if (rdp->nxtlist) { | ||||
| 		rdp->nocb_head = rdp->nxtlist; | ||||
| 		rdp->nocb_tail = rdp->nxttail[RCU_NEXT_TAIL]; | ||||
| 		atomic_long_set(&rdp->nocb_q_count, rdp->qlen); | ||||
| 		atomic_long_set(&rdp->nocb_q_count_lazy, rdp->qlen_lazy); | ||||
| 		rdp->nxtlist = NULL; | ||||
| 		rdp->qlen = 0; | ||||
| 		rdp->qlen_lazy = 0; | ||||
| 	if (!rcu_segcblist_empty(&rdp->cblist)) { | ||||
| 		rdp->nocb_head = rcu_segcblist_head(&rdp->cblist); | ||||
| 		rdp->nocb_tail = rcu_segcblist_tail(&rdp->cblist); | ||||
| 		atomic_long_set(&rdp->nocb_q_count, | ||||
| 				rcu_segcblist_n_cbs(&rdp->cblist)); | ||||
| 		atomic_long_set(&rdp->nocb_q_count_lazy, | ||||
| 				rcu_segcblist_n_lazy_cbs(&rdp->cblist)); | ||||
| 		rcu_segcblist_init(&rdp->cblist); | ||||
| 	} | ||||
| 	rdp->nxttail[RCU_NEXT_TAIL] = NULL; | ||||
| 	rcu_segcblist_disable(&rdp->cblist); | ||||
| 	return true; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -41,11 +41,11 @@ | |||
| #include <linux/mutex.h> | ||||
| #include <linux/debugfs.h> | ||||
| #include <linux/seq_file.h> | ||||
| #include <linux/prefetch.h> | ||||
| 
 | ||||
| #define RCU_TREE_NONCORE | ||||
| #include "tree.h" | ||||
| 
 | ||||
| DECLARE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); | ||||
| #include "rcu.h" | ||||
| 
 | ||||
| static int r_open(struct inode *inode, struct file *file, | ||||
| 					const struct seq_operations *op) | ||||
|  | @ -121,7 +121,7 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 		   cpu_is_offline(rdp->cpu) ? '!' : ' ', | ||||
| 		   ulong2long(rdp->completed), ulong2long(rdp->gpnum), | ||||
| 		   rdp->cpu_no_qs.b.norm, | ||||
| 		   rdp->rcu_qs_ctr_snap == per_cpu(rcu_qs_ctr, rdp->cpu), | ||||
| 		   rdp->rcu_qs_ctr_snap == per_cpu(rdp->dynticks->rcu_qs_ctr, rdp->cpu), | ||||
| 		   rdp->core_needs_qs); | ||||
| 	seq_printf(m, " dt=%d/%llx/%d df=%lu", | ||||
| 		   rcu_dynticks_snap(rdp->dynticks), | ||||
|  | @ -130,17 +130,15 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) | |||
| 		   rdp->dynticks_fqs); | ||||
| 	seq_printf(m, " of=%lu", rdp->offline_fqs); | ||||
| 	rcu_nocb_q_lengths(rdp, &ql, &qll); | ||||
| 	qll += rdp->qlen_lazy; | ||||
| 	ql += rdp->qlen; | ||||
| 	qll += rcu_segcblist_n_lazy_cbs(&rdp->cblist); | ||||
| 	ql += rcu_segcblist_n_cbs(&rdp->cblist); | ||||
| 	seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", | ||||
| 		   qll, ql, | ||||
| 		   ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != | ||||
| 			rdp->nxttail[RCU_NEXT_TAIL]], | ||||
| 		   ".R"[rdp->nxttail[RCU_WAIT_TAIL] != | ||||
| 			rdp->nxttail[RCU_NEXT_READY_TAIL]], | ||||
| 		   ".W"[rdp->nxttail[RCU_DONE_TAIL] != | ||||
| 			rdp->nxttail[RCU_WAIT_TAIL]], | ||||
| 		   ".D"[&rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]]); | ||||
| 		   ".N"[!rcu_segcblist_segempty(&rdp->cblist, RCU_NEXT_TAIL)], | ||||
| 		   ".R"[!rcu_segcblist_segempty(&rdp->cblist, | ||||
| 						RCU_NEXT_READY_TAIL)], | ||||
| 		   ".W"[!rcu_segcblist_segempty(&rdp->cblist, RCU_WAIT_TAIL)], | ||||
| 		   ".D"[!rcu_segcblist_segempty(&rdp->cblist, RCU_DONE_TAIL)]); | ||||
| #ifdef CONFIG_RCU_BOOST | ||||
| 	seq_printf(m, " kt=%d/%c ktl=%x", | ||||
| 		   per_cpu(rcu_cpu_has_work, rdp->cpu), | ||||
|  | @ -278,7 +276,9 @@ static void print_one_rcu_state(struct seq_file *m, struct rcu_state *rsp) | |||
| 	seq_printf(m, "nfqs=%lu/nfqsng=%lu(%lu) fqlh=%lu oqlen=%ld/%ld\n", | ||||
| 		   rsp->n_force_qs, rsp->n_force_qs_ngp, | ||||
| 		   rsp->n_force_qs - rsp->n_force_qs_ngp, | ||||
| 		   READ_ONCE(rsp->n_force_qs_lh), rsp->qlen_lazy, rsp->qlen); | ||||
| 		   READ_ONCE(rsp->n_force_qs_lh), | ||||
| 		   rcu_cblist_n_lazy_cbs(&rsp->orphan_done), | ||||
| 		   rcu_cblist_n_cbs(&rsp->orphan_done)); | ||||
| 	for (rnp = &rsp->node[0]; rnp - &rsp->node[0] < rcu_num_nodes; rnp++) { | ||||
| 		if (rnp->level != level) { | ||||
| 			seq_puts(m, "\n"); | ||||
|  |  | |||
|  | @ -124,7 +124,7 @@ EXPORT_SYMBOL(rcu_read_lock_sched_held); | |||
|  * non-expedited counterparts?  Intended for use within RCU.  Note | ||||
|  * that if the user specifies both rcu_expedited and rcu_normal, then | ||||
|  * rcu_normal wins.  (Except during the time period during boot from | ||||
|  * when the first task is spawned until the rcu_exp_runtime_mode() | ||||
|  * when the first task is spawned until the rcu_set_runtime_mode() | ||||
|  * core_initcall() is invoked, at which point everything is expedited.) | ||||
|  */ | ||||
| bool rcu_gp_is_normal(void) | ||||
|  | @ -190,6 +190,39 @@ void rcu_end_inkernel_boot(void) | |||
| 
 | ||||
| #endif /* #ifndef CONFIG_TINY_RCU */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Test each non-SRCU synchronous grace-period wait API.  This is | ||||
|  * useful just after a change in mode for these primitives, and | ||||
|  * during early boot. | ||||
|  */ | ||||
| void rcu_test_sync_prims(void) | ||||
| { | ||||
| 	if (!IS_ENABLED(CONFIG_PROVE_RCU)) | ||||
| 		return; | ||||
| 	synchronize_rcu(); | ||||
| 	synchronize_rcu_bh(); | ||||
| 	synchronize_sched(); | ||||
| 	synchronize_rcu_expedited(); | ||||
| 	synchronize_rcu_bh_expedited(); | ||||
| 	synchronize_sched_expedited(); | ||||
| } | ||||
| 
 | ||||
| #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) | ||||
| 
 | ||||
| /*
 | ||||
|  * Switch to run-time mode once RCU has fully initialized. | ||||
|  */ | ||||
| static int __init rcu_set_runtime_mode(void) | ||||
| { | ||||
| 	rcu_test_sync_prims(); | ||||
| 	rcu_scheduler_active = RCU_SCHEDULER_RUNNING; | ||||
| 	rcu_test_sync_prims(); | ||||
| 	return 0; | ||||
| } | ||||
| core_initcall(rcu_set_runtime_mode); | ||||
| 
 | ||||
| #endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */ | ||||
| 
 | ||||
| #ifdef CONFIG_PREEMPT_RCU | ||||
| 
 | ||||
| /*
 | ||||
|  | @ -632,6 +665,7 @@ static void check_holdout_task(struct task_struct *t, | |||
| 		put_task_struct(t); | ||||
| 		return; | ||||
| 	} | ||||
| 	rcu_request_urgent_qs_task(t); | ||||
| 	if (!needreport) | ||||
| 		return; | ||||
| 	if (*firstreport) { | ||||
|  | @ -817,23 +851,6 @@ static void rcu_spawn_tasks_kthread(void) | |||
| 
 | ||||
| #endif /* #ifdef CONFIG_TASKS_RCU */ | ||||
| 
 | ||||
| /*
 | ||||
|  * Test each non-SRCU synchronous grace-period wait API.  This is | ||||
|  * useful just after a change in mode for these primitives, and | ||||
|  * during early boot. | ||||
|  */ | ||||
| void rcu_test_sync_prims(void) | ||||
| { | ||||
| 	if (!IS_ENABLED(CONFIG_PROVE_RCU)) | ||||
| 		return; | ||||
| 	synchronize_rcu(); | ||||
| 	synchronize_rcu_bh(); | ||||
| 	synchronize_sched(); | ||||
| 	synchronize_rcu_expedited(); | ||||
| 	synchronize_rcu_bh_expedited(); | ||||
| 	synchronize_sched_expedited(); | ||||
| } | ||||
| 
 | ||||
| #ifdef CONFIG_PROVE_RCU | ||||
| 
 | ||||
| /*
 | ||||
|  |  | |||
|  | @ -3378,7 +3378,7 @@ static void __sched notrace __schedule(bool preempt) | |||
| 		hrtick_clear(rq); | ||||
| 
 | ||||
| 	local_irq_disable(); | ||||
| 	rcu_note_context_switch(); | ||||
| 	rcu_note_context_switch(preempt); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Make sure that signal_pending_state()->signal_pending() below | ||||
|  |  | |||
|  | @ -1237,7 +1237,7 @@ struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, | |||
| 		} | ||||
| 		/*
 | ||||
| 		 * This sighand can be already freed and even reused, but | ||||
| 		 * we rely on SLAB_DESTROY_BY_RCU and sighand_ctor() which | ||||
| 		 * we rely on SLAB_TYPESAFE_BY_RCU and sighand_ctor() which | ||||
| 		 * initializes ->siglock: this slab can't go away, it has | ||||
| 		 * the same object type, ->siglock can't be reinitialized. | ||||
| 		 * | ||||
|  |  | |||
|  | @ -413,7 +413,7 @@ void kasan_cache_create(struct kmem_cache *cache, size_t *size, | |||
| 	*size += sizeof(struct kasan_alloc_meta); | ||||
| 
 | ||||
| 	/* Add free meta. */ | ||||
| 	if (cache->flags & SLAB_DESTROY_BY_RCU || cache->ctor || | ||||
| 	if (cache->flags & SLAB_TYPESAFE_BY_RCU || cache->ctor || | ||||
| 	    cache->object_size < sizeof(struct kasan_free_meta)) { | ||||
| 		cache->kasan_info.free_meta_offset = *size; | ||||
| 		*size += sizeof(struct kasan_free_meta); | ||||
|  | @ -561,7 +561,7 @@ static void kasan_poison_slab_free(struct kmem_cache *cache, void *object) | |||
| 	unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE); | ||||
| 
 | ||||
| 	/* RCU slabs could be legally used after free within the RCU period */ | ||||
| 	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		return; | ||||
| 
 | ||||
| 	kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE); | ||||
|  | @ -572,7 +572,7 @@ bool kasan_slab_free(struct kmem_cache *cache, void *object) | |||
| 	s8 shadow_byte; | ||||
| 
 | ||||
| 	/* RCU slabs could be legally used after free within the RCU period */ | ||||
| 	if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		return false; | ||||
| 
 | ||||
| 	shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(object)); | ||||
|  |  | |||
|  | @ -95,7 +95,7 @@ void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, | |||
| void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size) | ||||
| { | ||||
| 	/* TODO: RCU freeing is unsupported for now; hide false positives. */ | ||||
| 	if (!s->ctor && !(s->flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (!s->ctor && !(s->flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		kmemcheck_mark_freed(object, size); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -21,7 +21,7 @@ | |||
| #include <linux/slab.h> | ||||
| 
 | ||||
| /* global SRCU for all MMs */ | ||||
| static struct srcu_struct srcu; | ||||
| DEFINE_STATIC_SRCU(srcu); | ||||
| 
 | ||||
| /*
 | ||||
|  * This function allows mmu_notifier::release callback to delay a call to | ||||
|  | @ -252,12 +252,6 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, | |||
| 
 | ||||
| 	BUG_ON(atomic_read(&mm->mm_users) <= 0); | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * Verify that mmu_notifier_init() already run and the global srcu is | ||||
| 	 * initialized. | ||||
| 	 */ | ||||
| 	BUG_ON(!srcu.per_cpu_ref); | ||||
| 
 | ||||
| 	ret = -ENOMEM; | ||||
| 	mmu_notifier_mm = kmalloc(sizeof(struct mmu_notifier_mm), GFP_KERNEL); | ||||
| 	if (unlikely(!mmu_notifier_mm)) | ||||
|  | @ -406,9 +400,3 @@ void mmu_notifier_unregister_no_release(struct mmu_notifier *mn, | |||
| 	mmdrop(mm); | ||||
| } | ||||
| EXPORT_SYMBOL_GPL(mmu_notifier_unregister_no_release); | ||||
| 
 | ||||
| static int __init mmu_notifier_init(void) | ||||
| { | ||||
| 	return init_srcu_struct(&srcu); | ||||
| } | ||||
| subsys_initcall(mmu_notifier_init); | ||||
|  |  | |||
|  | @ -430,7 +430,7 @@ static void anon_vma_ctor(void *data) | |||
| void __init anon_vma_init(void) | ||||
| { | ||||
| 	anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), | ||||
| 			0, SLAB_DESTROY_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, | ||||
| 			0, SLAB_TYPESAFE_BY_RCU|SLAB_PANIC|SLAB_ACCOUNT, | ||||
| 			anon_vma_ctor); | ||||
| 	anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, | ||||
| 			SLAB_PANIC|SLAB_ACCOUNT); | ||||
|  | @ -481,7 +481,7 @@ struct anon_vma *page_get_anon_vma(struct page *page) | |||
| 	 * If this page is still mapped, then its anon_vma cannot have been | ||||
| 	 * freed.  But if it has been unmapped, we have no security against the | ||||
| 	 * anon_vma structure being freed and reused (for another anon_vma: | ||||
| 	 * SLAB_DESTROY_BY_RCU guarantees that - so the atomic_inc_not_zero() | ||||
| 	 * SLAB_TYPESAFE_BY_RCU guarantees that - so the atomic_inc_not_zero() | ||||
| 	 * above cannot corrupt). | ||||
| 	 */ | ||||
| 	if (!page_mapped(page)) { | ||||
|  |  | |||
|  | @ -1728,7 +1728,7 @@ static void slab_destroy(struct kmem_cache *cachep, struct page *page) | |||
| 
 | ||||
| 	freelist = page->freelist; | ||||
| 	slab_destroy_debugcheck(cachep, page); | ||||
| 	if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (unlikely(cachep->flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		call_rcu(&page->rcu_head, kmem_rcu_free); | ||||
| 	else | ||||
| 		kmem_freepages(cachep, page); | ||||
|  | @ -1924,7 +1924,7 @@ static bool set_objfreelist_slab_cache(struct kmem_cache *cachep, | |||
| 
 | ||||
| 	cachep->num = 0; | ||||
| 
 | ||||
| 	if (cachep->ctor || flags & SLAB_DESTROY_BY_RCU) | ||||
| 	if (cachep->ctor || flags & SLAB_TYPESAFE_BY_RCU) | ||||
| 		return false; | ||||
| 
 | ||||
| 	left = calculate_slab_order(cachep, size, | ||||
|  | @ -2030,7 +2030,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
| 	if (size < 4096 || fls(size - 1) == fls(size-1 + REDZONE_ALIGN + | ||||
| 						2 * sizeof(unsigned long long))) | ||||
| 		flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | ||||
| 	if (!(flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (!(flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		flags |= SLAB_POISON; | ||||
| #endif | ||||
| #endif | ||||
|  |  | |||
|  | @ -126,7 +126,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
| 
 | ||||
| /* Legal flag mask for kmem_cache_create(), for various configurations */ | ||||
| #define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \ | ||||
| 			 SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS ) | ||||
| 			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS ) | ||||
| 
 | ||||
| #if defined(CONFIG_DEBUG_SLAB) | ||||
| #define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER) | ||||
|  | @ -415,7 +415,7 @@ static inline size_t slab_ksize(const struct kmem_cache *s) | |||
| 	 * back there or track user information then we can | ||||
| 	 * only use the space before that information. | ||||
| 	 */ | ||||
| 	if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) | ||||
| 	if (s->flags & (SLAB_TYPESAFE_BY_RCU | SLAB_STORE_USER)) | ||||
| 		return s->inuse; | ||||
| 	/*
 | ||||
| 	 * Else we can use all the padding etc for the allocation | ||||
|  |  | |||
|  | @ -39,7 +39,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work, | |||
|  * Set of flags that will prevent slab merging | ||||
|  */ | ||||
| #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ | ||||
| 		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \ | ||||
| 		SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \ | ||||
| 		SLAB_FAILSLAB | SLAB_KASAN) | ||||
| 
 | ||||
| #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \ | ||||
|  | @ -500,7 +500,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work) | |||
| 	struct kmem_cache *s, *s2; | ||||
| 
 | ||||
| 	/*
 | ||||
| 	 * On destruction, SLAB_DESTROY_BY_RCU kmem_caches are put on the | ||||
| 	 * On destruction, SLAB_TYPESAFE_BY_RCU kmem_caches are put on the | ||||
| 	 * @slab_caches_to_rcu_destroy list.  The slab pages are freed | ||||
| 	 * through RCU and and the associated kmem_cache are dereferenced | ||||
| 	 * while freeing the pages, so the kmem_caches should be freed only | ||||
|  | @ -537,7 +537,7 @@ static int shutdown_cache(struct kmem_cache *s) | |||
| 	memcg_unlink_cache(s); | ||||
| 	list_del(&s->list); | ||||
| 
 | ||||
| 	if (s->flags & SLAB_DESTROY_BY_RCU) { | ||||
| 	if (s->flags & SLAB_TYPESAFE_BY_RCU) { | ||||
| 		list_add_tail(&s->list, &slab_caches_to_rcu_destroy); | ||||
| 		schedule_work(&slab_caches_to_rcu_destroy_work); | ||||
| 	} else { | ||||
|  |  | |||
|  | @ -126,7 +126,7 @@ static inline void clear_slob_page_free(struct page *sp) | |||
| 
 | ||||
| /*
 | ||||
|  * struct slob_rcu is inserted at the tail of allocated slob blocks, which | ||||
|  * were created with a SLAB_DESTROY_BY_RCU slab. slob_rcu is used to free | ||||
|  * were created with a SLAB_TYPESAFE_BY_RCU slab. slob_rcu is used to free | ||||
|  * the block using call_rcu. | ||||
|  */ | ||||
| struct slob_rcu { | ||||
|  | @ -524,7 +524,7 @@ EXPORT_SYMBOL(ksize); | |||
| 
 | ||||
| int __kmem_cache_create(struct kmem_cache *c, unsigned long flags) | ||||
| { | ||||
| 	if (flags & SLAB_DESTROY_BY_RCU) { | ||||
| 	if (flags & SLAB_TYPESAFE_BY_RCU) { | ||||
| 		/* leave room for rcu footer at the end of object */ | ||||
| 		c->size += sizeof(struct slob_rcu); | ||||
| 	} | ||||
|  | @ -598,7 +598,7 @@ static void kmem_rcu_free(struct rcu_head *head) | |||
| void kmem_cache_free(struct kmem_cache *c, void *b) | ||||
| { | ||||
| 	kmemleak_free_recursive(b, c->flags); | ||||
| 	if (unlikely(c->flags & SLAB_DESTROY_BY_RCU)) { | ||||
| 	if (unlikely(c->flags & SLAB_TYPESAFE_BY_RCU)) { | ||||
| 		struct slob_rcu *slob_rcu; | ||||
| 		slob_rcu = b + (c->size - sizeof(struct slob_rcu)); | ||||
| 		slob_rcu->size = c->size; | ||||
|  |  | |||
							
								
								
									
										12
									
								
								mm/slub.c
									
										
									
									
									
								
							
							
						
						
									
										12
									
								
								mm/slub.c
									
										
									
									
									
								
							|  | @ -1687,7 +1687,7 @@ static void rcu_free_slab(struct rcu_head *h) | |||
| 
 | ||||
| static void free_slab(struct kmem_cache *s, struct page *page) | ||||
| { | ||||
| 	if (unlikely(s->flags & SLAB_DESTROY_BY_RCU)) { | ||||
| 	if (unlikely(s->flags & SLAB_TYPESAFE_BY_RCU)) { | ||||
| 		struct rcu_head *head; | ||||
| 
 | ||||
| 		if (need_reserve_slab_rcu) { | ||||
|  | @ -2963,7 +2963,7 @@ static __always_inline void slab_free(struct kmem_cache *s, struct page *page, | |||
| 	 * slab_free_freelist_hook() could have put the items into quarantine. | ||||
| 	 * If so, no need to free them. | ||||
| 	 */ | ||||
| 	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (s->flags & SLAB_KASAN && !(s->flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		return; | ||||
| 	do_slab_free(s, page, head, tail, cnt, addr); | ||||
| } | ||||
|  | @ -3433,7 +3433,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
| 	 * the slab may touch the object after free or before allocation | ||||
| 	 * then we should never poison the object itself. | ||||
| 	 */ | ||||
| 	if ((flags & SLAB_POISON) && !(flags & SLAB_DESTROY_BY_RCU) && | ||||
| 	if ((flags & SLAB_POISON) && !(flags & SLAB_TYPESAFE_BY_RCU) && | ||||
| 			!s->ctor) | ||||
| 		s->flags |= __OBJECT_POISON; | ||||
| 	else | ||||
|  | @ -3455,7 +3455,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order) | |||
| 	 */ | ||||
| 	s->inuse = size; | ||||
| 
 | ||||
| 	if (((flags & (SLAB_DESTROY_BY_RCU | SLAB_POISON)) || | ||||
| 	if (((flags & (SLAB_TYPESAFE_BY_RCU | SLAB_POISON)) || | ||||
| 		s->ctor)) { | ||||
| 		/*
 | ||||
| 		 * Relocate free pointer after the object if it is not | ||||
|  | @ -3537,7 +3537,7 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags) | |||
| 	s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor); | ||||
| 	s->reserved = 0; | ||||
| 
 | ||||
| 	if (need_reserve_slab_rcu && (s->flags & SLAB_DESTROY_BY_RCU)) | ||||
| 	if (need_reserve_slab_rcu && (s->flags & SLAB_TYPESAFE_BY_RCU)) | ||||
| 		s->reserved = sizeof(struct rcu_head); | ||||
| 
 | ||||
| 	if (!calculate_sizes(s, -1)) | ||||
|  | @ -5042,7 +5042,7 @@ SLAB_ATTR_RO(cache_dma); | |||
| 
 | ||||
| static ssize_t destroy_by_rcu_show(struct kmem_cache *s, char *buf) | ||||
| { | ||||
| 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_DESTROY_BY_RCU)); | ||||
| 	return sprintf(buf, "%d\n", !!(s->flags & SLAB_TYPESAFE_BY_RCU)); | ||||
| } | ||||
| SLAB_ATTR_RO(destroy_by_rcu); | ||||
| 
 | ||||
|  |  | |||
|  | @ -951,7 +951,7 @@ static struct proto dccp_v4_prot = { | |||
| 	.orphan_count		= &dccp_orphan_count, | ||||
| 	.max_header		= MAX_DCCP_HEADER, | ||||
| 	.obj_size		= sizeof(struct dccp_sock), | ||||
| 	.slab_flags		= SLAB_DESTROY_BY_RCU, | ||||
| 	.slab_flags		= SLAB_TYPESAFE_BY_RCU, | ||||
| 	.rsk_prot		= &dccp_request_sock_ops, | ||||
| 	.twsk_prot		= &dccp_timewait_sock_ops, | ||||
| 	.h.hashinfo		= &dccp_hashinfo, | ||||
|  |  | |||
|  | @ -1014,7 +1014,7 @@ static struct proto dccp_v6_prot = { | |||
| 	.orphan_count	   = &dccp_orphan_count, | ||||
| 	.max_header	   = MAX_DCCP_HEADER, | ||||
| 	.obj_size	   = sizeof(struct dccp6_sock), | ||||
| 	.slab_flags	   = SLAB_DESTROY_BY_RCU, | ||||
| 	.slab_flags	   = SLAB_TYPESAFE_BY_RCU, | ||||
| 	.rsk_prot	   = &dccp6_request_sock_ops, | ||||
| 	.twsk_prot	   = &dccp6_timewait_sock_ops, | ||||
| 	.h.hashinfo	   = &dccp_hashinfo, | ||||
|  |  | |||
|  | @ -2402,7 +2402,7 @@ struct proto tcp_prot = { | |||
| 	.sysctl_rmem		= sysctl_tcp_rmem, | ||||
| 	.max_header		= MAX_TCP_HEADER, | ||||
| 	.obj_size		= sizeof(struct tcp_sock), | ||||
| 	.slab_flags		= SLAB_DESTROY_BY_RCU, | ||||
| 	.slab_flags		= SLAB_TYPESAFE_BY_RCU, | ||||
| 	.twsk_prot		= &tcp_timewait_sock_ops, | ||||
| 	.rsk_prot		= &tcp_request_sock_ops, | ||||
| 	.h.hashinfo		= &tcp_hashinfo, | ||||
|  |  | |||
|  | @ -1921,7 +1921,7 @@ struct proto tcpv6_prot = { | |||
| 	.sysctl_rmem		= sysctl_tcp_rmem, | ||||
| 	.max_header		= MAX_TCP_HEADER, | ||||
| 	.obj_size		= sizeof(struct tcp6_sock), | ||||
| 	.slab_flags		= SLAB_DESTROY_BY_RCU, | ||||
| 	.slab_flags		= SLAB_TYPESAFE_BY_RCU, | ||||
| 	.twsk_prot		= &tcp6_timewait_sock_ops, | ||||
| 	.rsk_prot		= &tcp6_request_sock_ops, | ||||
| 	.h.hashinfo		= &tcp_hashinfo, | ||||
|  |  | |||
|  | @ -142,7 +142,7 @@ static struct proto llc_proto = { | |||
| 	.name	  = "LLC", | ||||
| 	.owner	  = THIS_MODULE, | ||||
| 	.obj_size = sizeof(struct llc_sock), | ||||
| 	.slab_flags = SLAB_DESTROY_BY_RCU, | ||||
| 	.slab_flags = SLAB_TYPESAFE_BY_RCU, | ||||
| }; | ||||
| 
 | ||||
| /**
 | ||||
|  |  | |||
|  | @ -506,7 +506,7 @@ static struct sock *__llc_lookup_established(struct llc_sap *sap, | |||
| again: | ||||
| 	sk_nulls_for_each_rcu(rc, node, laddr_hb) { | ||||
| 		if (llc_estab_match(sap, daddr, laddr, rc)) { | ||||
| 			/* Extra checks required by SLAB_DESTROY_BY_RCU */ | ||||
| 			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */ | ||||
| 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) | ||||
| 				goto again; | ||||
| 			if (unlikely(llc_sk(rc)->sap != sap || | ||||
|  | @ -565,7 +565,7 @@ static struct sock *__llc_lookup_listener(struct llc_sap *sap, | |||
| again: | ||||
| 	sk_nulls_for_each_rcu(rc, node, laddr_hb) { | ||||
| 		if (llc_listener_match(sap, laddr, rc)) { | ||||
| 			/* Extra checks required by SLAB_DESTROY_BY_RCU */ | ||||
| 			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */ | ||||
| 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) | ||||
| 				goto again; | ||||
| 			if (unlikely(llc_sk(rc)->sap != sap || | ||||
|  |  | |||
|  | @ -328,7 +328,7 @@ static struct sock *llc_lookup_dgram(struct llc_sap *sap, | |||
| again: | ||||
| 	sk_nulls_for_each_rcu(rc, node, laddr_hb) { | ||||
| 		if (llc_dgram_match(sap, laddr, rc)) { | ||||
| 			/* Extra checks required by SLAB_DESTROY_BY_RCU */ | ||||
| 			/* Extra checks required by SLAB_TYPESAFE_BY_RCU */ | ||||
| 			if (unlikely(!atomic_inc_not_zero(&rc->sk_refcnt))) | ||||
| 				goto again; | ||||
| 			if (unlikely(llc_sk(rc)->sap != sap || | ||||
|  |  | |||
|  | @ -918,7 +918,7 @@ static unsigned int early_drop_list(struct net *net, | |||
| 			continue; | ||||
| 
 | ||||
| 		/* kill only if still in same netns -- might have moved due to
 | ||||
| 		 * SLAB_DESTROY_BY_RCU rules. | ||||
| 		 * SLAB_TYPESAFE_BY_RCU rules. | ||||
| 		 * | ||||
| 		 * We steal the timer reference.  If that fails timer has | ||||
| 		 * already fired or someone else deleted it. Just drop ref | ||||
|  | @ -1073,7 +1073,7 @@ __nf_conntrack_alloc(struct net *net, | |||
| 
 | ||||
| 	/*
 | ||||
| 	 * Do not use kmem_cache_zalloc(), as this cache uses | ||||
| 	 * SLAB_DESTROY_BY_RCU. | ||||
| 	 * SLAB_TYPESAFE_BY_RCU. | ||||
| 	 */ | ||||
| 	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp); | ||||
| 	if (ct == NULL) | ||||
|  | @ -1118,7 +1118,7 @@ void nf_conntrack_free(struct nf_conn *ct) | |||
| 	struct net *net = nf_ct_net(ct); | ||||
| 
 | ||||
| 	/* A freed object has refcnt == 0, that's
 | ||||
| 	 * the golden rule for SLAB_DESTROY_BY_RCU | ||||
| 	 * the golden rule for SLAB_TYPESAFE_BY_RCU | ||||
| 	 */ | ||||
| 	NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); | ||||
| 
 | ||||
|  | @ -1882,7 +1882,7 @@ int nf_conntrack_init_start(void) | |||
| 	nf_conntrack_cachep = kmem_cache_create("nf_conntrack", | ||||
| 						sizeof(struct nf_conn), | ||||
| 						NFCT_INFOMASK + 1, | ||||
| 						SLAB_DESTROY_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); | ||||
| 						SLAB_TYPESAFE_BY_RCU | SLAB_HWCACHE_ALIGN, NULL); | ||||
| 	if (!nf_conntrack_cachep) | ||||
| 		goto err_cachep; | ||||
| 
 | ||||
|  |  | |||
|  | @ -101,7 +101,7 @@ struct proto smc_proto = { | |||
| 	.unhash		= smc_unhash_sk, | ||||
| 	.obj_size	= sizeof(struct smc_sock), | ||||
| 	.h.smc_hash	= &smc_v4_hashinfo, | ||||
| 	.slab_flags	= SLAB_DESTROY_BY_RCU, | ||||
| 	.slab_flags	= SLAB_TYPESAFE_BY_RCU, | ||||
| }; | ||||
| EXPORT_SYMBOL_GPL(smc_proto); | ||||
| 
 | ||||
|  |  | |||
|  | @ -170,7 +170,7 @@ qemu_append="`identify_qemu_append "$QEMU"`" | |||
| # Pull in Kconfig-fragment boot parameters | ||||
| boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" | ||||
| # Generate kernel-version-specific boot parameters | ||||
| boot_args="`per_version_boot_params "$boot_args" $builddir/.config $seconds`" | ||||
| boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" | ||||
| 
 | ||||
| if test -n "$TORTURE_BUILDONLY" | ||||
| then | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 Ingo Molnar
						Ingo Molnar