PM: Restrict swap use to later in the suspend sequence

Currently swap is restricted before drivers have had a chance to do
their prepare() PM callbacks. Restricting swap this early means that if
a driver needs to evict some content from memory into sawp in it's
prepare callback, it won't be able to.

On AMD dGPUs this can lead to failed suspends under memory pressure
situations as all VRAM must be evicted to system memory or swap.

Move the swap restriction to right after all devices have had a chance
to do the prepare() callback.  If there is any problem with the sequence,
restore swap in the appropriate dpm resume callbacks or error handling
paths.

Closes: https://github.com/ROCm/ROCK-Kernel-Driver/issues/174
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/2362
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Tested-by: Nat Wittstock <nat@fardog.io>
Tested-by: Lucian Langa <lucilanga@7pot.org>
Link: https://patch.msgid.link/20250613214413.4127087-1-superm1@kernel.org
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
This commit is contained in:
Mario Limonciello 2025-06-13 16:43:44 -05:00 committed by Rafael J. Wysocki
parent 86731a2a65
commit 12ffc3b151
6 changed files with 11 additions and 11 deletions

View file

@ -1236,6 +1236,7 @@ void dpm_complete(pm_message_t state)
*/
void dpm_resume_end(pm_message_t state)
{
pm_restore_gfp_mask();
dpm_resume(state);
dpm_complete(state);
}
@ -2176,8 +2177,10 @@ int dpm_suspend_start(pm_message_t state)
error = dpm_prepare(state);
if (error)
dpm_save_failed_step(SUSPEND_PREPARE);
else
else {
pm_restrict_gfp_mask();
error = dpm_suspend(state);
}
dpm_show_time(starttime, state, error, "start");
return error;

View file

@ -446,6 +446,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
extern void ksys_sync_helper(void);
extern void pm_report_hw_sleep_time(u64 t);
extern void pm_report_max_hw_sleep(u64 t);
void pm_restrict_gfp_mask(void);
void pm_restore_gfp_mask(void);
#define pm_notifier(fn, pri) { \
static struct notifier_block fn##_nb = \
@ -492,6 +494,9 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
static inline void pm_report_hw_sleep_time(u64 t) {};
static inline void pm_report_max_hw_sleep(u64 t) {};
static inline void pm_restrict_gfp_mask(void) {}
static inline void pm_restore_gfp_mask(void) {}
static inline void ksys_sync_helper(void) {}
#define pm_notifier(fn, pri) do { (void)(fn); } while (0)

View file

@ -1136,6 +1136,7 @@ int kernel_kexec(void)
Resume_devices:
dpm_resume_end(PMSG_RESTORE);
Resume_console:
pm_restore_gfp_mask();
console_resume_all();
thaw_processes();
Restore_console:

View file

@ -423,7 +423,6 @@ int hibernation_snapshot(int platform_mode)
}
console_suspend_all();
pm_restrict_gfp_mask();
error = dpm_suspend(PMSG_FREEZE);
@ -559,7 +558,6 @@ int hibernation_restore(int platform_mode)
pm_prepare_console();
console_suspend_all();
pm_restrict_gfp_mask();
error = dpm_suspend_start(PMSG_QUIESCE);
if (!error) {
error = resume_target_kernel(platform_mode);
@ -571,7 +569,6 @@ int hibernation_restore(int platform_mode)
BUG_ON(!error);
}
dpm_resume_end(PMSG_RECOVER);
pm_restore_gfp_mask();
console_resume_all();
pm_restore_console();
return error;

View file

@ -239,11 +239,6 @@ static inline void suspend_test_finish(const char *label) {}
/* kernel/power/main.c */
extern int pm_notifier_call_chain_robust(unsigned long val_up, unsigned long val_down);
extern int pm_notifier_call_chain(unsigned long val);
void pm_restrict_gfp_mask(void);
void pm_restore_gfp_mask(void);
#else
static inline void pm_restrict_gfp_mask(void) {}
static inline void pm_restore_gfp_mask(void) {}
#endif
#ifdef CONFIG_HIGHMEM

View file

@ -540,6 +540,7 @@ int suspend_devices_and_enter(suspend_state_t state)
return error;
Recover_platform:
pm_restore_gfp_mask();
platform_recover(state);
goto Resume_devices;
}
@ -606,9 +607,7 @@ static int enter_state(suspend_state_t state)
trace_suspend_resume(TPS("suspend_enter"), state, false);
pm_pr_dbg("Suspending system (%s)\n", mem_sleep_labels[state]);
pm_restrict_gfp_mask();
error = suspend_devices_and_enter(state);
pm_restore_gfp_mask();
Finish:
events_check_enabled = false;