linux/drivers/mtd/ubi/fastmap-wl.c

568 lines
14 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2012 Linutronix GmbH
* Copyright (c) 2014 sigma star gmbh
* Author: Richard Weinberger <richard@nod.at>
*/
/**
* update_fastmap_work_fn - calls ubi_update_fastmap from a work queue
* @wrk: the work description object
*/
static void update_fastmap_work_fn(struct work_struct *wrk)
{
struct ubi_device *ubi = container_of(wrk, struct ubi_device, fm_work);
ubi_update_fastmap(ubi);
spin_lock(&ubi->wl_lock);
ubi->fm_work_scheduled = 0;
spin_unlock(&ubi->wl_lock);
}
/**
* find_anchor_wl_entry - find wear-leveling entry to used as anchor PEB.
* @root: the RB-tree where to look for
*/
static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root)
{
struct rb_node *p;
struct ubi_wl_entry *e, *victim = NULL;
int max_ec = UBI_MAX_ERASECOUNTER;
ubi_rb_for_each_entry(p, e, root, u.rb) {
if (e->pnum < UBI_FM_MAX_START && e->ec < max_ec) {
victim = e;
max_ec = e->ec;
}
}
return victim;
}
static inline void return_unused_peb(struct ubi_device *ubi,
struct ubi_wl_entry *e)
{
wl_tree_add(e, &ubi->free);
ubi->free_count++;
}
/**
* return_unused_pool_pebs - returns unused PEB to the free tree.
* @ubi: UBI device description object
* @pool: fastmap pool description object
*/
static void return_unused_pool_pebs(struct ubi_device *ubi,
struct ubi_fm_pool *pool)
{
int i;
struct ubi_wl_entry *e;
for (i = pool->used; i < pool->size; i++) {
e = ubi->lookuptbl[pool->pebs[i]];
return_unused_peb(ubi, e);
}
}
/**
* ubi_wl_get_fm_peb - find a physical erase block with a given maximal number.
* @ubi: UBI device description object
* @anchor: This PEB will be used as anchor PEB by fastmap
*
* The function returns a physical erase block with a given maximal number
* and removes it from the wl subsystem.
* Must be called with wl_lock held!
*/
struct ubi_wl_entry *ubi_wl_get_fm_peb(struct ubi_device *ubi, int anchor)
{
struct ubi_wl_entry *e = NULL;
if (!ubi->free.rb_node)
goto out;
if (anchor)
e = find_anchor_wl_entry(&ubi->free);
else
e = find_mean_wl_entry(ubi, &ubi->free);
if (!e)
goto out;
self_check_in_wl_tree(ubi, e, &ubi->free);
/* remove it from the free list,
* the wl subsystem does no longer know this erase block */
rb_erase(&e->u.rb, &ubi->free);
ubi->free_count--;
out:
return e;
}
/*
* wait_free_pebs_for_pool - wait until there enough free pebs
* @ubi: UBI device description object
*
* Wait and execute do_work until there are enough free pebs, fill pool
* as much as we can. This will reduce pool refilling times, which can
* reduce the fastmap updating frequency.
*/
static void wait_free_pebs_for_pool(struct ubi_device *ubi)
{
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
struct ubi_fm_pool *pool = &ubi->fm_pool;
int free, expect_free, executed;
/*
* There are at least following free pebs which reserved by UBI:
* 1. WL_RESERVED_PEBS[1]
* 2. EBA_RESERVED_PEBS[1]
* 3. fm pebs - 1: Twice fastmap size deducted by fastmap and fm_anchor
* 4. beb_rsvd_pebs: This value should be get under lock ubi->wl_lock
*/
int reserved = WL_RESERVED_PEBS + EBA_RESERVED_PEBS +
ubi: fastmap: Fix lapsed wear leveling for first 64 PEBs The anchor PEB must be picked from first 64 PEBs, these PEBs could have large erase counter greater than other PEBs especially when free space is nearly running out. The ubi_update_fastmap will be called as long as pool/wl_pool is empty, old anchor PEB is erased when updating fastmap. Given an UBI device with N PEBs, free PEBs is nearly running out and pool will be filled with 1 PEB every time ubi_update_fastmap invoked. So t=N/POOL_SIZE[1]/64 means that in worst case the erase counter of first 64 PEBs is t times greater than other PEBs in theory. After running fsstress for 24h, the erase counter statistics for two UBI devices shown as follow(CONFIG_MTD_UBI_WL_THRESHOLD=128): Device A(1024 PEBs, pool=50, wl_pool=25): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 0 0 0 0 10000 .. 99999: 960 29224 29282 29362 100000 .. inf: 64 117897 117934 117940 --------------------------------------------------------- Total : 1024 29224 34822 117940 Device B(8192 PEBs, pool=256, wl_pool=128): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 8128 2253 2321 2387 10000 .. 99999: 64 35387 35387 35388 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 8192 2253 2579 35388 The key point is reducing fastmap updating frequency by enlarging POOL_SIZE, so let UBI reserve ubi->fm_pool.max_size PEBs during attaching. Then POOL_SIZE will become ubi->fm_pool.max_size/2 even in free space running out case. Given an UBI device with 8192 PEBs(16384\8192\4096 is common large-capacity flash), t=8192/128/64=1. The fastmap updating will happen in either wl_pool or pool is empty, so setting fm_pool_rsv_cnt as ubi->fm_pool.max_size can fill wl_pool in full state. After pool reservation, running fsstress for 24h: Device A(1024 PEBs, pool=50, wl_pool=25): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 0 0 0 0 10000 .. 99999: 1024 33801 33997 34056 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 1024 33801 33997 34056 Device B(8192 PEBs, pool=256, wl_pool=128): ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 0 0 0 0 100 .. 999: 0 0 0 0 1000 .. 9999: 8192 2205 2397 2460 10000 .. 99999: 0 0 0 0 100000 .. inf: 0 0 0 0 --------------------------------------------------------- Total : 8192 2205 2397 2460 The difference of erase counter between first 64 PEBs and others is under WL_FREE_MAX_DIFF(2*UBI_WL_THRESHOLD=2*128=256). Device A: 34056 - 33801 = 255 Device B: 2460 - 2205 = 255 Next patch will add a switch to control whether UBI needs to reserve PEBs for filling pool. Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2023-08-28 14:38:43 +08:00
ubi->fm_size / ubi->leb_size - 1 + ubi->fm_pool_rsv_cnt;
do {
spin_lock(&ubi->wl_lock);
free = ubi->free_count;
free += pool->size - pool->used + wl_pool->size - wl_pool->used;
expect_free = reserved + ubi->beb_rsvd_pebs;
spin_unlock(&ubi->wl_lock);
/*
* Break out if there are no works or work is executed failure,
* given the fact that erase_worker will schedule itself when
* -EBUSY is returned from mtd layer caused by system shutdown.
*/
if (do_work(ubi, &executed) || !executed)
break;
} while (free < expect_free);
}
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
/*
* left_free_count - returns the number of free pebs to fill fm pools
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
* @ubi: UBI device description object
*
* This helper function returns the number of free pebs (deducted
* by fastmap pebs) to fill fm_pool and fm_wl_pool.
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
*/
static int left_free_count(struct ubi_device *ubi)
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
{
int fm_used = 0; // fastmap non anchor pebs.
if (!ubi->free.rb_node)
return 0;
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
if (!ubi->ro_mode && !ubi->fm_disabled)
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
fm_used = ubi->fm_size / ubi->leb_size - 1;
return ubi->free_count - fm_used;
}
/*
* can_fill_pools - whether free PEBs will be left after filling pools
* @ubi: UBI device description object
* @free: current number of free PEBs
*
* Return %1 if there are still left free PEBs after filling pools,
* otherwise %0 is returned.
*/
static int can_fill_pools(struct ubi_device *ubi, int free)
{
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
struct ubi_fm_pool *pool = &ubi->fm_pool;
int pool_need = pool->max_size - pool->size +
wl_pool->max_size - wl_pool->size;
if (free - pool_need < 1)
return 0;
return 1;
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
}
/**
* ubi_refill_pools_and_lock - refills all fastmap PEB pools and takes fm locks.
* @ubi: UBI device description object
*/
void ubi_refill_pools_and_lock(struct ubi_device *ubi)
{
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
struct ubi_fm_pool *pool = &ubi->fm_pool;
struct ubi_wl_entry *e;
int enough;
if (!ubi->ro_mode && !ubi->fm_disabled)
wait_free_pebs_for_pool(ubi);
down_write(&ubi->fm_protect);
down_write(&ubi->work_sem);
down_write(&ubi->fm_eba_sem);
spin_lock(&ubi->wl_lock);
return_unused_pool_pebs(ubi, wl_pool);
return_unused_pool_pebs(ubi, pool);
wl_pool->size = 0;
pool->size = 0;
ubi: Select fastmap anchor PEBs considering wear level rules There is a risk that the fastmap anchor PEB is alternating between just two PEBs, the current anchor and the previous anchor that was just deleted. As the fastmap pools gets the first take on free PEBs, the pools may leave no free PEBs to be selected as the new anchor, resulting in the two PEBs alternating behaviour. If the anchor PEBs gets a high erase count the PEBs will not be used by the pools but remain in ubi->free, even more increasing the likelihood they will be used as anchors. Getting stuck using only a couple of PEBs continuously will result in an uneven wear, eventually leading to failure. To fix this: - Choose the fastmap anchor when the most free PEBs are available. This is during rebuilding of the fastmap pools, after the unused pool PEBs are added to ubi->free but before the pools are populated again from the free PEBs. Also reserve an additional second best PEB as a candidate for the next time the fast map anchor is updated. If a better PEB is found the next time the fast map anchor is updated, the candidate is made available for building the pools. - Enable anchor move within the anchor area again as it is useful for distributing wear. - The anchor candidate for the next fastmap update is the most suited free PEB. Check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, the PEB is considered unsuitable for now. As all other non used anchor area PEBs should be even worse, free up the used anchor area PEB with the lowest erase count. Signed-off-by: Arne Edholm <arne.edholm@axis.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2020-01-13 15:56:22 +01:00
if (ubi->fm_anchor) {
wl_tree_add(ubi->fm_anchor, &ubi->free);
ubi->free_count++;
ubi->fm_anchor = NULL;
ubi: Select fastmap anchor PEBs considering wear level rules There is a risk that the fastmap anchor PEB is alternating between just two PEBs, the current anchor and the previous anchor that was just deleted. As the fastmap pools gets the first take on free PEBs, the pools may leave no free PEBs to be selected as the new anchor, resulting in the two PEBs alternating behaviour. If the anchor PEBs gets a high erase count the PEBs will not be used by the pools but remain in ubi->free, even more increasing the likelihood they will be used as anchors. Getting stuck using only a couple of PEBs continuously will result in an uneven wear, eventually leading to failure. To fix this: - Choose the fastmap anchor when the most free PEBs are available. This is during rebuilding of the fastmap pools, after the unused pool PEBs are added to ubi->free but before the pools are populated again from the free PEBs. Also reserve an additional second best PEB as a candidate for the next time the fast map anchor is updated. If a better PEB is found the next time the fast map anchor is updated, the candidate is made available for building the pools. - Enable anchor move within the anchor area again as it is useful for distributing wear. - The anchor candidate for the next fastmap update is the most suited free PEB. Check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, the PEB is considered unsuitable for now. As all other non used anchor area PEBs should be even worse, free up the used anchor area PEB with the lowest erase count. Signed-off-by: Arne Edholm <arne.edholm@axis.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2020-01-13 15:56:22 +01:00
}
if (!ubi->fm_disabled)
/*
* All available PEBs are in ubi->free, now is the time to get
* the best anchor PEBs.
*/
ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
ubi: Select fastmap anchor PEBs considering wear level rules There is a risk that the fastmap anchor PEB is alternating between just two PEBs, the current anchor and the previous anchor that was just deleted. As the fastmap pools gets the first take on free PEBs, the pools may leave no free PEBs to be selected as the new anchor, resulting in the two PEBs alternating behaviour. If the anchor PEBs gets a high erase count the PEBs will not be used by the pools but remain in ubi->free, even more increasing the likelihood they will be used as anchors. Getting stuck using only a couple of PEBs continuously will result in an uneven wear, eventually leading to failure. To fix this: - Choose the fastmap anchor when the most free PEBs are available. This is during rebuilding of the fastmap pools, after the unused pool PEBs are added to ubi->free but before the pools are populated again from the free PEBs. Also reserve an additional second best PEB as a candidate for the next time the fast map anchor is updated. If a better PEB is found the next time the fast map anchor is updated, the candidate is made available for building the pools. - Enable anchor move within the anchor area again as it is useful for distributing wear. - The anchor candidate for the next fastmap update is the most suited free PEB. Check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, the PEB is considered unsuitable for now. As all other non used anchor area PEBs should be even worse, free up the used anchor area PEB with the lowest erase count. Signed-off-by: Arne Edholm <arne.edholm@axis.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2020-01-13 15:56:22 +01:00
for (;;) {
enough = 0;
if (pool->size < pool->max_size) {
if (left_free_count(ubi) <= 0)
break;
e = wl_get_wle(ubi);
if (!e)
break;
pool->pebs[pool->size] = e->pnum;
pool->size++;
} else
enough++;
if (wl_pool->size < wl_pool->max_size) {
int left_free = left_free_count(ubi);
if (left_free <= 0)
break;
e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF,
!can_fill_pools(ubi, left_free));
self_check_in_wl_tree(ubi, e, &ubi->free);
rb_erase(&e->u.rb, &ubi->free);
ubi->free_count--;
wl_pool->pebs[wl_pool->size] = e->pnum;
wl_pool->size++;
} else
enough++;
if (enough == 2)
break;
}
wl_pool->used = 0;
pool->used = 0;
spin_unlock(&ubi->wl_lock);
}
/**
* produce_free_peb - produce a free physical eraseblock.
* @ubi: UBI device description object
*
* This function tries to make a free PEB by means of synchronous execution of
* pending works. This may be needed if, for example the background thread is
* disabled. Returns zero in case of success and a negative error code in case
* of failure.
*/
static int produce_free_peb(struct ubi_device *ubi)
{
int err;
while (!ubi->free.rb_node && ubi->works_count) {
dbg_wl("do one work synchronously");
err = do_work(ubi, NULL);
if (err)
return err;
}
return 0;
}
/**
* ubi_wl_get_peb - get a physical eraseblock.
* @ubi: UBI device description object
*
* This function returns a physical eraseblock in case of success and a
* negative error code in case of failure.
* Returns with ubi->fm_eba_sem held in read mode!
*/
int ubi_wl_get_peb(struct ubi_device *ubi)
{
ubi: ubi_wl_get_peb: Increase the number of attempts while getting PEB Running stress test io_paral (A pressure ubi test in mtd-utils) on an UBI device with fewer PEBs (fastmap enabled) may cause ENOSPC errors and make UBI device read-only, but there are still free PEBs on the UBI device. This problem can be easily reproduced by performing the following steps on a 2-core machine: $ modprobe nandsim first_id_byte=0x20 second_id_byte=0x33 parts=80 $ modprobe ubi mtd="0,0" fm_autoconvert $ ./io_paral /dev/ubi0 We may see the following verbose: (output) [io_paral] update_volume():108: failed to write 380 bytes at offset 95920 of volume 2 [io_paral] update_volume():109: update: 97088 bytes [io_paral] write_thread():227: function pwrite() failed with error 28 (No space left on device) [io_paral] write_thread():229: cannot write 15872 bytes to offs 31744, wrote -1 (dmesg) ubi0 error: ubi_wl_get_peb [ubi]: Unable to get a free PEB from user WL pool ubi0 warning: ubi_eba_write_leb [ubi]: switch to read-only mode CPU: 0 PID: 2027 Comm: io_paral Not tainted 5.3.0-rc2-00001-g5986cd0 #9 ubi0 warning: try_write_vid_and_data [ubi]: failed to write VID header to LEB 2:5, PEB 18 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0 -0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x85/0xba ubi_eba_write_leb+0xa1e/0xa40 [ubi] vol_cdev_write+0x307/0x520 [ubi] vfs_write+0xfa/0x280 ksys_pwrite64+0xc5/0xe0 __x64_sys_pwrite64+0x22/0x30 do_syscall_64+0xbf/0x440 In function ubi_wl_get_peb, the operation of filling the pool (ubi_update_fastmap) with free PEBs and fetching a free PEB from the pool is not atomic. After thread A filling the pool with free PEB, free PEB may be taken away by thread B. When thread A checks the expression again, the condition is still unsatisfactory. At this time, there may still be free PEBs on UBI that can be filled into the pool. This patch increases the number of attempts to obtain PEB. An extreme case (No free PEBs left after creating test volumes) has been tested on different type of machines for 100 times. The biggest number of attempts are shown below: x86_64 arm64 2-core 4 4 4-core 8 4 8-core 4 4 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2019-08-10 17:57:41 +08:00
int ret, attempts = 0;
struct ubi_fm_pool *pool = &ubi->fm_pool;
struct ubi_fm_pool *wl_pool = &ubi->fm_wl_pool;
again:
down_read(&ubi->fm_eba_sem);
spin_lock(&ubi->wl_lock);
/* We check here also for the WL pool because at this point we can
* refill the WL pool synchronous. */
if (pool->used == pool->size || wl_pool->used == wl_pool->size) {
spin_unlock(&ubi->wl_lock);
up_read(&ubi->fm_eba_sem);
ret = ubi_update_fastmap(ubi);
if (ret) {
ubi_msg(ubi, "Unable to write a new fastmap: %i", ret);
down_read(&ubi->fm_eba_sem);
return -ENOSPC;
}
down_read(&ubi->fm_eba_sem);
spin_lock(&ubi->wl_lock);
}
if (pool->used == pool->size) {
spin_unlock(&ubi->wl_lock);
ubi: ubi_wl_get_peb: Increase the number of attempts while getting PEB Running stress test io_paral (A pressure ubi test in mtd-utils) on an UBI device with fewer PEBs (fastmap enabled) may cause ENOSPC errors and make UBI device read-only, but there are still free PEBs on the UBI device. This problem can be easily reproduced by performing the following steps on a 2-core machine: $ modprobe nandsim first_id_byte=0x20 second_id_byte=0x33 parts=80 $ modprobe ubi mtd="0,0" fm_autoconvert $ ./io_paral /dev/ubi0 We may see the following verbose: (output) [io_paral] update_volume():108: failed to write 380 bytes at offset 95920 of volume 2 [io_paral] update_volume():109: update: 97088 bytes [io_paral] write_thread():227: function pwrite() failed with error 28 (No space left on device) [io_paral] write_thread():229: cannot write 15872 bytes to offs 31744, wrote -1 (dmesg) ubi0 error: ubi_wl_get_peb [ubi]: Unable to get a free PEB from user WL pool ubi0 warning: ubi_eba_write_leb [ubi]: switch to read-only mode CPU: 0 PID: 2027 Comm: io_paral Not tainted 5.3.0-rc2-00001-g5986cd0 #9 ubi0 warning: try_write_vid_and_data [ubi]: failed to write VID header to LEB 2:5, PEB 18 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0 -0-ga698c8995f-prebuilt.qemu.org 04/01/2014 Call Trace: dump_stack+0x85/0xba ubi_eba_write_leb+0xa1e/0xa40 [ubi] vol_cdev_write+0x307/0x520 [ubi] vfs_write+0xfa/0x280 ksys_pwrite64+0xc5/0xe0 __x64_sys_pwrite64+0x22/0x30 do_syscall_64+0xbf/0x440 In function ubi_wl_get_peb, the operation of filling the pool (ubi_update_fastmap) with free PEBs and fetching a free PEB from the pool is not atomic. After thread A filling the pool with free PEB, free PEB may be taken away by thread B. When thread A checks the expression again, the condition is still unsatisfactory. At this time, there may still be free PEBs on UBI that can be filled into the pool. This patch increases the number of attempts to obtain PEB. An extreme case (No free PEBs left after creating test volumes) has been tested on different type of machines for 100 times. The biggest number of attempts are shown below: x86_64 arm64 2-core 4 4 4-core 8 4 8-core 4 4 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2019-08-10 17:57:41 +08:00
attempts++;
if (attempts == 10) {
ubi_err(ubi, "Unable to get a free PEB from user WL pool");
ret = -ENOSPC;
goto out;
}
up_read(&ubi->fm_eba_sem);
ret = produce_free_peb(ubi);
if (ret < 0) {
down_read(&ubi->fm_eba_sem);
goto out;
}
goto again;
}
ubi_assert(pool->used < pool->size);
ret = pool->pebs[pool->used++];
prot_queue_add(ubi, ubi->lookuptbl[ret]);
spin_unlock(&ubi->wl_lock);
out:
return ret;
}
/**
* next_peb_for_wl - returns next PEB to be used internally by the
* WL sub-system.
*
* @ubi: UBI device description object
* @need_fill: whether to fill wear-leveling pool when no PEBs are found
*/
static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi,
bool need_fill)
{
struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
int pnum;
if (pool->used == pool->size) {
if (need_fill && !ubi->fm_work_scheduled) {
/*
* We cannot update the fastmap here because this
* function is called in atomic context.
* Let's fail here and refill/update it as soon as
* possible.
*/
ubi->fm_work_scheduled = 1;
schedule_work(&ubi->fm_work);
}
return NULL;
}
pnum = pool->pebs[pool->used];
return ubi->lookuptbl[pnum];
}
/**
* need_wear_leveling - checks whether to trigger a wear leveling work.
* UBI fetches free PEB from wl_pool, we check free PEBs from both 'wl_pool'
* and 'ubi->free', because free PEB in 'ubi->free' tree maybe moved into
* 'wl_pool' by ubi_refill_pools().
*
* @ubi: UBI device description object
*/
static bool need_wear_leveling(struct ubi_device *ubi)
{
int ec;
struct ubi_wl_entry *e;
if (!ubi->used.rb_node)
return false;
e = next_peb_for_wl(ubi, false);
if (!e) {
if (!ubi->free.rb_node)
return false;
e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0);
ec = e->ec;
} else {
ec = e->ec;
if (ubi->free.rb_node) {
e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF, 0);
ec = max(ec, e->ec);
}
}
e = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
return ec - e->ec >= UBI_WL_THRESHOLD;
}
/* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system.
*
* @ubi: UBI device description object
*/
static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
{
struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
int pnum;
ubi_assert(rwsem_is_locked(&ubi->fm_eba_sem));
if (pool->used == pool->size) {
/* We cannot update the fastmap here because this
* function is called in atomic context.
* Let's fail here and refill/update it as soon as possible. */
if (!ubi->fm_work_scheduled) {
ubi->fm_work_scheduled = 1;
schedule_work(&ubi->fm_work);
}
return NULL;
}
pnum = pool->pebs[pool->used++];
return ubi->lookuptbl[pnum];
}
/**
* ubi_ensure_anchor_pebs - schedule wear-leveling to produce an anchor PEB.
* @ubi: UBI device description object
*/
int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
{
struct ubi_work *wrk;
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
struct ubi_wl_entry *anchor;
spin_lock(&ubi->wl_lock);
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
/* Do we already have an anchor? */
if (ubi->fm_anchor) {
spin_unlock(&ubi->wl_lock);
return 0;
}
ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool not empty There at least 6 PEBs reserved on UBI device: 1. EBA_RESERVED_PEBS[1] 2. WL_RESERVED_PEBS[1] 3. UBI_LAYOUT_VOLUME_EBS[2] 4. MIN_FASTMAP_RESERVED_PEBS[2] When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS + WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1]) free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs") and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] - FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after filling pool, wl_pool is always empty. So, UBI could be stuck in an infinite loop: ubi_thread system_wq wear_leveling_worker <-------------------------------------------------- get_peb_for_wl | // fm_wl_pool, used = size = 0 | schedule_work(&ubi->fm_work) | | update_fastmap_work_fn | ubi_update_fastmap | ubi_refill_pools | // ubi->free_count - ubi->beb_rsvd_pebs < 5 | // wl_pool is not filled with any PEBs | schedule_erase(old_fm_anchor) | ubi_ensure_anchor_pebs | __schedule_ubi_work(wear_leveling_worker) | | __erase_worker | ensure_wear_leveling | __schedule_ubi_work(wear_leveling_worker) -------------------------- , which cause high cpu usage of ubi_bgt: top - 12:10:42 up 5 min, 2 users, load average: 1.76, 0.68, 0.27 Tasks: 123 total, 3 running, 54 sleeping, 0 stopped, 0 zombie PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 1589 root 20 0 0 0 0 R 45.0 0.0 0:38.86 ubi_bgt0d 319 root 20 0 0 0 0 I 15.2 0.0 0:15.29 kworker/0:3-eve 371 root 20 0 0 0 0 I 14.9 0.0 0:12.85 kworker/3:3-eve 20 root 20 0 0 0 0 I 11.3 0.0 0:05.33 kworker/1:0-eve 202 root 20 0 0 0 0 I 11.3 0.0 0:04.93 kworker/2:3-eve In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering wear level rules"), there are three key changes: 1) Choose the fastmap anchor when the most free PEBs are available. 2) Enable anchor move within the anchor area again as it is useful for distributing wear. 3) Import a candidate fm anchor and check this PEB's erase count during wear leveling. If the wear leveling limit is exceeded, use the used anchor area PEB with the lowest erase count to replace it. The anchor candidate can be removed, we can check fm_anchor PEB's erase count during wear leveling. Fix it by: 1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling. 2) Preferentially filling one free peb into fm_wl_pool in condition of ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough free count for fastmap non anchor pebs after the above prerequisites are met. Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling ubi_refill_pools() with all erase works done. Fetch a reproducer in [Link]. Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules") Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2022-05-10 20:31:24 +08:00
/* See if we can find an anchor PEB on the list of free PEBs */
anchor = ubi_wl_get_fm_peb(ubi, 1);
if (anchor) {
ubi->fm_anchor = anchor;
spin_unlock(&ubi->wl_lock);
return 0;
}
ubi->fm_do_produce_anchor = 1;
/* No luck, trigger wear leveling to produce a new anchor PEB. */
if (ubi->wl_scheduled) {
spin_unlock(&ubi->wl_lock);
return 0;
}
ubi->wl_scheduled = 1;
spin_unlock(&ubi->wl_lock);
wrk = kmalloc(sizeof(struct ubi_work), GFP_NOFS);
if (!wrk) {
spin_lock(&ubi->wl_lock);
ubi->wl_scheduled = 0;
spin_unlock(&ubi->wl_lock);
return -ENOMEM;
}
wrk->func = &wear_leveling_worker;
__schedule_ubi_work(ubi, wrk);
return 0;
}
/**
* ubi_wl_put_fm_peb - returns a PEB used in a fastmap to the wear-leveling
* sub-system.
* see: ubi_wl_put_peb()
*
* @ubi: UBI device description object
* @fm_e: physical eraseblock to return
* @lnum: the last used logical eraseblock number for the PEB
* @torture: if this physical eraseblock has to be tortured
*/
int ubi_wl_put_fm_peb(struct ubi_device *ubi, struct ubi_wl_entry *fm_e,
int lnum, int torture)
{
struct ubi_wl_entry *e;
int vol_id, pnum = fm_e->pnum;
dbg_wl("PEB %d", pnum);
ubi_assert(pnum >= 0);
ubi_assert(pnum < ubi->peb_count);
spin_lock(&ubi->wl_lock);
e = ubi->lookuptbl[pnum];
/* This can happen if we recovered from a fastmap the very
* first time and writing now a new one. In this case the wl system
* has never seen any PEB used by the original fastmap.
*/
if (!e) {
e = fm_e;
ubi_assert(e->ec >= 0);
ubi->lookuptbl[pnum] = e;
}
spin_unlock(&ubi->wl_lock);
vol_id = lnum ? UBI_FM_DATA_VOLUME_ID : UBI_FM_SB_VOLUME_ID;
return schedule_erase(ubi, e, vol_id, lnum, torture, true);
}
/**
* ubi_is_erase_work - checks whether a work is erase work.
* @wrk: The work object to be checked
*/
int ubi_is_erase_work(struct ubi_work *wrk)
{
return wrk->func == erase_worker;
}
static void ubi_fastmap_close(struct ubi_device *ubi)
{
int i;
return_unused_pool_pebs(ubi, &ubi->fm_pool);
return_unused_pool_pebs(ubi, &ubi->fm_wl_pool);
if (ubi->fm_anchor) {
return_unused_peb(ubi, ubi->fm_anchor);
ubi->fm_anchor = NULL;
}
if (ubi->fm) {
for (i = 0; i < ubi->fm->used_blocks; i++)
kfree(ubi->fm->e[i]);
}
kfree(ubi->fm);
}
/**
* may_reserve_for_fm - tests whether a PEB shall be reserved for fastmap.
* See find_mean_wl_entry()
*
* @ubi: UBI device description object
* @e: physical eraseblock to return
* @root: RB tree to test against.
*/
static struct ubi_wl_entry *may_reserve_for_fm(struct ubi_device *ubi,
struct ubi_wl_entry *e,
struct rb_root *root) {
ubi: fastmap: may_reserve_for_fm: Don't reserve PEB if fm_anchor exists This is the part 1 to fix cyclically reusing single fastmap data PEBs. After running fsstress on UBIFS for a while, UBI (16384 blocks, fastmap takes 2 blocks) has an erase block(PEB: 8031) with big erase counter greater than any other pebs: ========================================================= from to count min avg max --------------------------------------------------------- 0 .. 9: 0 0 0 0 10 .. 99: 532 84 92 99 100 .. 999: 15787 100 147 229 1000 .. 9999: 64 4699 4765 4826 10000 .. 99999: 0 0 0 0 100000 .. inf: 1 272935 272935 272935 --------------------------------------------------------- Total : 16384 84 180 272935 Not like fm_anchor, there is no candidate PEBs for fastmap data area, so old fastmap data pebs will be reused after all free pebs are filled into pool/wl_pool: ubi_update_fastmap for (i = 1; i < new_fm->used_blocks; i++) erase_block(ubi, old_fm->e[i]->pnum) new_fm->e[i] = old_fm->e[i] According to wear leveling algorithm, UBI selects one small erase counter PEB from ubi->used and one big erase counter PEB from wl_pool, the reused fastmap data PEB is not in these trees. UBI won't schedule this PEB for wl even it is in ubi->used because wl algorithm expects small erase counter for used PEB. Don't reserve PEB for fastmap in may_reserve_for_fm() if fm_anchor already exists. Otherwise, when UBI is running out of free PEBs, the only one free PEB (pnum < 64) will be skipped and fastmap data will be written on the same old PEB. Fixes: dbb7d2a88d2a ("UBI: Add fastmap core") Link: https://bugzilla.kernel.org/show_bug.cgi?id=217787 Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com> Signed-off-by: Richard Weinberger <richard@nod.at>
2023-08-28 14:38:41 +08:00
if (e && !ubi->fm_disabled && !ubi->fm && !ubi->fm_anchor &&
e->pnum < UBI_FM_MAX_START)
e = rb_entry(rb_next(root->rb_node),
struct ubi_wl_entry, u.rb);
return e;
}