mm: page_alloc: tighten up find_suitable_fallback()

find_suitable_fallback() is not as efficient as it could be, and somewhat
difficult to follow.

1. should_try_claim_block() is a loop invariant. There is no point in
   checking fallback areas if the caller is interested in claimable
   blocks but the order and the migratetype don't allow for that.

2. __rmqueue_steal() doesn't care about claimability, so it shouldn't
   have to run those tests.

Different callers want different things from this helper:

1. __compact_finished() scans orders up until it finds a claimable block
2. __rmqueue_claim() scans orders down as long as blocks are claimable
3. __rmqueue_steal() doesn't care about claimability at all

Move should_try_claim_block() out of the loop. Only test it for the
two callers who care in the first place. Distinguish "no blocks" from
"order + mt are not claimable" in the return value; __rmqueue_claim()
can stop once order becomes unclaimable, __compact_finished() can keep
advancing until order becomes claimable.

Before:

 Performance counter stats for './run case-lru-file-mmap-read' (5 runs):

	 85,294.85 msec task-clock                       #    5.644 CPUs utilized               ( +-  0.32% )
	    15,968      context-switches                 #  187.209 /sec                        ( +-  3.81% )
	       153      cpu-migrations                   #    1.794 /sec                        ( +-  3.29% )
	   801,808      page-faults                      #    9.400 K/sec                       ( +-  0.10% )
   733,358,331,786      instructions                     #    1.87  insn per cycle              ( +-  0.20% )  (64.94%)
   392,622,904,199      cycles                           #    4.603 GHz                         ( +-  0.31% )  (64.84%)
   148,563,488,531      branches                         #    1.742 G/sec                       ( +-  0.18% )  (63.86%)
       152,143,228      branch-misses                    #    0.10% of all branches             ( +-  1.19% )  (62.82%)

	   15.1128 +- 0.0637 seconds time elapsed  ( +-  0.42% )

After:

 Performance counter stats for './run case-lru-file-mmap-read' (5 runs):

         84,380.21 msec task-clock                       #    5.664 CPUs utilized               ( +-  0.21% )
            16,656      context-switches                 #  197.392 /sec                        ( +-  3.27% )
               151      cpu-migrations                   #    1.790 /sec                        ( +-  3.28% )
           801,703      page-faults                      #    9.501 K/sec                       ( +-  0.09% )
   731,914,183,060      instructions                     #    1.88  insn per cycle              ( +-  0.38% )  (64.90%)
   388,673,535,116      cycles                           #    4.606 GHz                         ( +-  0.24% )  (65.06%)
   148,251,482,143      branches                         #    1.757 G/sec                       ( +-  0.37% )  (63.92%)
       149,766,550      branch-misses                    #    0.10% of all branches             ( +-  1.22% )  (62.88%)

           14.8968 +- 0.0486 seconds time elapsed  ( +-  0.33% )

Link: https://lkml.kernel.org/r/20250407180154.63348-2-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Brendan Jackman <jackmanb@google.com>
Tested-by: Shivank Garg <shivankg@amd.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Carlos Song <carlos.song@nxp.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Johannes Weiner 2025-04-07 14:01:54 -04:00 committed by Andrew Morton
parent 6e3092d788
commit ee414bd97b
3 changed files with 15 additions and 22 deletions

View file

@ -2344,7 +2344,6 @@ static enum compact_result __compact_finished(struct compact_control *cc)
ret = COMPACT_NO_SUITABLE_PAGE; ret = COMPACT_NO_SUITABLE_PAGE;
for (order = cc->order; order < NR_PAGE_ORDERS; order++) { for (order = cc->order; order < NR_PAGE_ORDERS; order++) {
struct free_area *area = &cc->zone->free_area[order]; struct free_area *area = &cc->zone->free_area[order];
bool claim_block;
/* Job done if page is free of the right migratetype */ /* Job done if page is free of the right migratetype */
if (!free_area_empty(area, migratetype)) if (!free_area_empty(area, migratetype))
@ -2360,8 +2359,7 @@ static enum compact_result __compact_finished(struct compact_control *cc)
* Job done if allocation would steal freepages from * Job done if allocation would steal freepages from
* other migratetype buddy lists. * other migratetype buddy lists.
*/ */
if (find_suitable_fallback(area, order, migratetype, if (find_suitable_fallback(area, order, migratetype, true) >= 0)
true, &claim_block) != -1)
/* /*
* Movable pages are OK in any pageblock. If we are * Movable pages are OK in any pageblock. If we are
* stealing for a non-movable allocation, make sure * stealing for a non-movable allocation, make sure

View file

@ -910,7 +910,7 @@ static inline void init_cma_pageblock(struct page *page)
int find_suitable_fallback(struct free_area *area, unsigned int order, int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool claim_only, bool *claim_block); int migratetype, bool claimable);
static inline bool free_area_empty(struct free_area *area, int migratetype) static inline bool free_area_empty(struct free_area *area, int migratetype)
{ {

View file

@ -2068,31 +2068,25 @@ static bool should_try_claim_block(unsigned int order, int start_mt)
/* /*
* Check whether there is a suitable fallback freepage with requested order. * Check whether there is a suitable fallback freepage with requested order.
* Sets *claim_block to instruct the caller whether it should convert a whole * If claimable is true, this function returns fallback_mt only if
* pageblock to the returned migratetype.
* If only_claim is true, this function returns fallback_mt only if
* we would do this whole-block claiming. This would help to reduce * we would do this whole-block claiming. This would help to reduce
* fragmentation due to mixed migratetype pages in one pageblock. * fragmentation due to mixed migratetype pages in one pageblock.
*/ */
int find_suitable_fallback(struct free_area *area, unsigned int order, int find_suitable_fallback(struct free_area *area, unsigned int order,
int migratetype, bool only_claim, bool *claim_block) int migratetype, bool claimable)
{ {
int i; int i;
int fallback_mt;
if (claimable && !should_try_claim_block(order, migratetype))
return -2;
if (area->nr_free == 0) if (area->nr_free == 0)
return -1; return -1;
*claim_block = false;
for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) { for (i = 0; i < MIGRATE_PCPTYPES - 1 ; i++) {
fallback_mt = fallbacks[migratetype][i]; int fallback_mt = fallbacks[migratetype][i];
if (free_area_empty(area, fallback_mt))
continue;
if (should_try_claim_block(order, migratetype)) if (!free_area_empty(area, fallback_mt))
*claim_block = true;
if (*claim_block || !only_claim)
return fallback_mt; return fallback_mt;
} }
@ -2189,7 +2183,6 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
int min_order = order; int min_order = order;
struct page *page; struct page *page;
int fallback_mt; int fallback_mt;
bool claim_block;
/* /*
* Do not steal pages from freelists belonging to other pageblocks * Do not steal pages from freelists belonging to other pageblocks
@ -2208,11 +2201,14 @@ __rmqueue_claim(struct zone *zone, int order, int start_migratetype,
--current_order) { --current_order) {
area = &(zone->free_area[current_order]); area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order, fallback_mt = find_suitable_fallback(area, current_order,
start_migratetype, false, &claim_block); start_migratetype, true);
/* No block in that order */
if (fallback_mt == -1) if (fallback_mt == -1)
continue; continue;
if (!claim_block) /* Advanced into orders too low to claim, abort */
if (fallback_mt == -2)
break; break;
page = get_page_from_free_area(area, fallback_mt); page = get_page_from_free_area(area, fallback_mt);
@ -2240,12 +2236,11 @@ __rmqueue_steal(struct zone *zone, int order, int start_migratetype)
int current_order; int current_order;
struct page *page; struct page *page;
int fallback_mt; int fallback_mt;
bool claim_block;
for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) {
area = &(zone->free_area[current_order]); area = &(zone->free_area[current_order]);
fallback_mt = find_suitable_fallback(area, current_order, fallback_mt = find_suitable_fallback(area, current_order,
start_migratetype, false, &claim_block); start_migratetype, false);
if (fallback_mt == -1) if (fallback_mt == -1)
continue; continue;