mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-04-13 09:59:31 +00:00
Miscellaneous perf events fixes:
- Fix __free_event() corner case splat - Fix false-positive uprobes related lockdep splat on CONFIG_PREEMPT_RT=y kernels - Fix a complicated perf sigtrap race that may result in hangs Signed-off-by: Ingo Molnar <mingo@kernel.org> -----BEGIN PGP SIGNATURE----- iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAmf4M0MRHG1pbmdvQGtl cm5lbC5vcmcACgkQEnMQ0APhK1jAag//RwS5kSQJsEyzzTVqeEt7bpHlzwcNeJ2y mu0bNYr1iS8/a+mMiW+XNKldWFT6Z9BvcPelrYsxRVePD7/qjSXwV0ln0+hXjqm6 aIMG/AQEIHSXaKq/hmMOpZqg+VTG9kum7nGNqRKpkc4CT9KPRJSvzx5tBf4Y6iP1 h7I1d/Z9mdYkMhNgjq3l/Le6yw6I9PCpXHbgLA3XHNCmWCGg4TF4pOU8ad+kBmur QkMB2A4uRr/mhi4DCbde5gXvPig4GTSWWdyivwle7Llof56I4KrrLb4HbPMtdIkR w+wHl+d5hqXjnC4Wh9IxYbGGPflQ4N4vzTJBRhomgtZ5tlaZKfbRQLsl4PGNkUq5 Lz6KZEDY2686brap20wkxQu5SNmyXCA/H/ryZswZfg+ooCmMpWIR8r11NCJgvc3l +o2vXIIs++LmDuY5ZQ6MEshC7213I9cKp4MIO8KLMrfSWkgWXHKqZo6HI7q+kJgm Hpz75Bm+PPFkWftq9zCUz7D/N/aLbh8kK0XArtmKNSkNAeIO/3IgGVfHfmzD0QZr Fwu9sC5FYVyanvwHVbUFzrKlXmqaghYn7H4ShPEBNmWTpaEO38RssPzt4uozggj1 mjbTQ6rHN3r1UP5bF5TIiXVPcO7r0KcuYuKnZoAYUSHQmap5Bb3ghqx3eMdJOdmx yaIaJNoYduw= =V5bo -----END PGP SIGNATURE----- Merge tag 'perf-urgent-2025-04-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull misc perf events fixes from Ingo Molnar: - Fix __free_event() corner case splat - Fix false-positive uprobes related lockdep splat on CONFIG_PREEMPT_RT=y kernels - Fix a complicated perf sigtrap race that may result in hangs * tag 'perf-urgent-2025-04-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf: Fix hang while freeing sigtrap event uprobes: Avoid false-positive lockdep splat on CONFIG_PREEMPT_RT=y in the ri_timer() uprobe timer callback, use raw_write_seqcount_*() perf/core: Fix WARN_ON(!ctx) in __free_event() for partial init
This commit is contained in:
commit
ac253a537d
3 changed files with 34 additions and 52 deletions
|
@ -823,7 +823,6 @@ struct perf_event {
|
||||||
struct irq_work pending_disable_irq;
|
struct irq_work pending_disable_irq;
|
||||||
struct callback_head pending_task;
|
struct callback_head pending_task;
|
||||||
unsigned int pending_work;
|
unsigned int pending_work;
|
||||||
struct rcuwait pending_work_wait;
|
|
||||||
|
|
||||||
atomic_t event_limit;
|
atomic_t event_limit;
|
||||||
|
|
||||||
|
|
|
@ -5518,30 +5518,6 @@ static bool exclusive_event_installable(struct perf_event *event,
|
||||||
|
|
||||||
static void perf_free_addr_filters(struct perf_event *event);
|
static void perf_free_addr_filters(struct perf_event *event);
|
||||||
|
|
||||||
static void perf_pending_task_sync(struct perf_event *event)
|
|
||||||
{
|
|
||||||
struct callback_head *head = &event->pending_task;
|
|
||||||
|
|
||||||
if (!event->pending_work)
|
|
||||||
return;
|
|
||||||
/*
|
|
||||||
* If the task is queued to the current task's queue, we
|
|
||||||
* obviously can't wait for it to complete. Simply cancel it.
|
|
||||||
*/
|
|
||||||
if (task_work_cancel(current, head)) {
|
|
||||||
event->pending_work = 0;
|
|
||||||
local_dec(&event->ctx->nr_no_switch_fast);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* All accesses related to the event are within the same RCU section in
|
|
||||||
* perf_pending_task(). The RCU grace period before the event is freed
|
|
||||||
* will make sure all those accesses are complete by then.
|
|
||||||
*/
|
|
||||||
rcuwait_wait_event(&event->pending_work_wait, !event->pending_work, TASK_UNINTERRUPTIBLE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* vs perf_event_alloc() error */
|
/* vs perf_event_alloc() error */
|
||||||
static void __free_event(struct perf_event *event)
|
static void __free_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
|
@ -5599,7 +5575,6 @@ static void _free_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
irq_work_sync(&event->pending_irq);
|
irq_work_sync(&event->pending_irq);
|
||||||
irq_work_sync(&event->pending_disable_irq);
|
irq_work_sync(&event->pending_disable_irq);
|
||||||
perf_pending_task_sync(event);
|
|
||||||
|
|
||||||
unaccount_event(event);
|
unaccount_event(event);
|
||||||
|
|
||||||
|
@ -5692,10 +5667,17 @@ static void perf_remove_from_owner(struct perf_event *event)
|
||||||
|
|
||||||
static void put_event(struct perf_event *event)
|
static void put_event(struct perf_event *event)
|
||||||
{
|
{
|
||||||
|
struct perf_event *parent;
|
||||||
|
|
||||||
if (!atomic_long_dec_and_test(&event->refcount))
|
if (!atomic_long_dec_and_test(&event->refcount))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
parent = event->parent;
|
||||||
_free_event(event);
|
_free_event(event);
|
||||||
|
|
||||||
|
/* Matches the refcount bump in inherit_event() */
|
||||||
|
if (parent)
|
||||||
|
put_event(parent);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -5779,11 +5761,6 @@ again:
|
||||||
if (tmp == child) {
|
if (tmp == child) {
|
||||||
perf_remove_from_context(child, DETACH_GROUP);
|
perf_remove_from_context(child, DETACH_GROUP);
|
||||||
list_move(&child->child_list, &free_list);
|
list_move(&child->child_list, &free_list);
|
||||||
/*
|
|
||||||
* This matches the refcount bump in inherit_event();
|
|
||||||
* this can't be the last reference.
|
|
||||||
*/
|
|
||||||
put_event(event);
|
|
||||||
} else {
|
} else {
|
||||||
var = &ctx->refcount;
|
var = &ctx->refcount;
|
||||||
}
|
}
|
||||||
|
@ -5809,7 +5786,8 @@ again:
|
||||||
void *var = &child->ctx->refcount;
|
void *var = &child->ctx->refcount;
|
||||||
|
|
||||||
list_del(&child->child_list);
|
list_del(&child->child_list);
|
||||||
free_event(child);
|
/* Last reference unless ->pending_task work is pending */
|
||||||
|
put_event(child);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wake any perf_event_free_task() waiting for this event to be
|
* Wake any perf_event_free_task() waiting for this event to be
|
||||||
|
@ -5820,7 +5798,11 @@ again:
|
||||||
}
|
}
|
||||||
|
|
||||||
no_ctx:
|
no_ctx:
|
||||||
put_event(event); /* Must be the 'last' reference */
|
/*
|
||||||
|
* Last reference unless ->pending_task work is pending on this event
|
||||||
|
* or any of its children.
|
||||||
|
*/
|
||||||
|
put_event(event);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
EXPORT_SYMBOL_GPL(perf_event_release_kernel);
|
||||||
|
@ -7235,12 +7217,6 @@ static void perf_pending_task(struct callback_head *head)
|
||||||
struct perf_event *event = container_of(head, struct perf_event, pending_task);
|
struct perf_event *event = container_of(head, struct perf_event, pending_task);
|
||||||
int rctx;
|
int rctx;
|
||||||
|
|
||||||
/*
|
|
||||||
* All accesses to the event must belong to the same implicit RCU read-side
|
|
||||||
* critical section as the ->pending_work reset. See comment in
|
|
||||||
* perf_pending_task_sync().
|
|
||||||
*/
|
|
||||||
rcu_read_lock();
|
|
||||||
/*
|
/*
|
||||||
* If we 'fail' here, that's OK, it means recursion is already disabled
|
* If we 'fail' here, that's OK, it means recursion is already disabled
|
||||||
* and we won't recurse 'further'.
|
* and we won't recurse 'further'.
|
||||||
|
@ -7251,9 +7227,8 @@ static void perf_pending_task(struct callback_head *head)
|
||||||
event->pending_work = 0;
|
event->pending_work = 0;
|
||||||
perf_sigtrap(event);
|
perf_sigtrap(event);
|
||||||
local_dec(&event->ctx->nr_no_switch_fast);
|
local_dec(&event->ctx->nr_no_switch_fast);
|
||||||
rcuwait_wake_up(&event->pending_work_wait);
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
put_event(event);
|
||||||
|
|
||||||
if (rctx >= 0)
|
if (rctx >= 0)
|
||||||
perf_swevent_put_recursion_context(rctx);
|
perf_swevent_put_recursion_context(rctx);
|
||||||
|
@ -10248,6 +10223,7 @@ static int __perf_event_overflow(struct perf_event *event,
|
||||||
!task_work_add(current, &event->pending_task, notify_mode)) {
|
!task_work_add(current, &event->pending_task, notify_mode)) {
|
||||||
event->pending_work = pending_id;
|
event->pending_work = pending_id;
|
||||||
local_inc(&event->ctx->nr_no_switch_fast);
|
local_inc(&event->ctx->nr_no_switch_fast);
|
||||||
|
WARN_ON_ONCE(!atomic_long_inc_not_zero(&event->refcount));
|
||||||
|
|
||||||
event->pending_addr = 0;
|
event->pending_addr = 0;
|
||||||
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
|
if (valid_sample && (data->sample_flags & PERF_SAMPLE_ADDR))
|
||||||
|
@ -12610,7 +12586,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
||||||
init_irq_work(&event->pending_irq, perf_pending_irq);
|
init_irq_work(&event->pending_irq, perf_pending_irq);
|
||||||
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
|
event->pending_disable_irq = IRQ_WORK_INIT_HARD(perf_pending_disable);
|
||||||
init_task_work(&event->pending_task, perf_pending_task);
|
init_task_work(&event->pending_task, perf_pending_task);
|
||||||
rcuwait_init(&event->pending_work_wait);
|
|
||||||
|
|
||||||
mutex_init(&event->mmap_mutex);
|
mutex_init(&event->mmap_mutex);
|
||||||
raw_spin_lock_init(&event->addr_filters.lock);
|
raw_spin_lock_init(&event->addr_filters.lock);
|
||||||
|
@ -13747,8 +13722,7 @@ perf_event_exit_event(struct perf_event *event, struct perf_event_context *ctx)
|
||||||
* Kick perf_poll() for is_event_hup();
|
* Kick perf_poll() for is_event_hup();
|
||||||
*/
|
*/
|
||||||
perf_event_wakeup(parent_event);
|
perf_event_wakeup(parent_event);
|
||||||
free_event(event);
|
put_event(event);
|
||||||
put_event(parent_event);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13872,13 +13846,11 @@ static void perf_free_event(struct perf_event *event,
|
||||||
list_del_init(&event->child_list);
|
list_del_init(&event->child_list);
|
||||||
mutex_unlock(&parent->child_mutex);
|
mutex_unlock(&parent->child_mutex);
|
||||||
|
|
||||||
put_event(parent);
|
|
||||||
|
|
||||||
raw_spin_lock_irq(&ctx->lock);
|
raw_spin_lock_irq(&ctx->lock);
|
||||||
perf_group_detach(event);
|
perf_group_detach(event);
|
||||||
list_del_event(event, ctx);
|
list_del_event(event, ctx);
|
||||||
raw_spin_unlock_irq(&ctx->lock);
|
raw_spin_unlock_irq(&ctx->lock);
|
||||||
free_event(event);
|
put_event(event);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -14016,6 +13988,9 @@ inherit_event(struct perf_event *parent_event,
|
||||||
if (IS_ERR(child_event))
|
if (IS_ERR(child_event))
|
||||||
return child_event;
|
return child_event;
|
||||||
|
|
||||||
|
get_ctx(child_ctx);
|
||||||
|
child_event->ctx = child_ctx;
|
||||||
|
|
||||||
pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
|
pmu_ctx = find_get_pmu_context(child_event->pmu, child_ctx, child_event);
|
||||||
if (IS_ERR(pmu_ctx)) {
|
if (IS_ERR(pmu_ctx)) {
|
||||||
free_event(child_event);
|
free_event(child_event);
|
||||||
|
@ -14037,8 +14012,6 @@ inherit_event(struct perf_event *parent_event,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
get_ctx(child_ctx);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Make the child state follow the state of the parent event,
|
* Make the child state follow the state of the parent event,
|
||||||
* not its attr.disabled bit. We hold the parent's mutex,
|
* not its attr.disabled bit. We hold the parent's mutex,
|
||||||
|
@ -14059,7 +14032,6 @@ inherit_event(struct perf_event *parent_event,
|
||||||
local64_set(&hwc->period_left, sample_period);
|
local64_set(&hwc->period_left, sample_period);
|
||||||
}
|
}
|
||||||
|
|
||||||
child_event->ctx = child_ctx;
|
|
||||||
child_event->overflow_handler = parent_event->overflow_handler;
|
child_event->overflow_handler = parent_event->overflow_handler;
|
||||||
child_event->overflow_handler_context
|
child_event->overflow_handler_context
|
||||||
= parent_event->overflow_handler_context;
|
= parent_event->overflow_handler_context;
|
||||||
|
|
|
@ -1956,6 +1956,9 @@ static void free_ret_instance(struct uprobe_task *utask,
|
||||||
* to-be-reused return instances for future uretprobes. If ri_timer()
|
* to-be-reused return instances for future uretprobes. If ri_timer()
|
||||||
* happens to be running right now, though, we fallback to safety and
|
* happens to be running right now, though, we fallback to safety and
|
||||||
* just perform RCU-delated freeing of ri.
|
* just perform RCU-delated freeing of ri.
|
||||||
|
* Admittedly, this is a rather simple use of seqcount, but it nicely
|
||||||
|
* abstracts away all the necessary memory barriers, so we use
|
||||||
|
* a well-supported kernel primitive here.
|
||||||
*/
|
*/
|
||||||
if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
|
if (raw_seqcount_try_begin(&utask->ri_seqcount, seq)) {
|
||||||
/* immediate reuse of ri without RCU GP is OK */
|
/* immediate reuse of ri without RCU GP is OK */
|
||||||
|
@ -2016,12 +2019,20 @@ static void ri_timer(struct timer_list *timer)
|
||||||
/* RCU protects return_instance from freeing. */
|
/* RCU protects return_instance from freeing. */
|
||||||
guard(rcu)();
|
guard(rcu)();
|
||||||
|
|
||||||
write_seqcount_begin(&utask->ri_seqcount);
|
/*
|
||||||
|
* See free_ret_instance() for notes on seqcount use.
|
||||||
|
* We also employ raw API variants to avoid lockdep false-positive
|
||||||
|
* warning complaining about enabled preemption. The timer can only be
|
||||||
|
* invoked once for a uprobe_task. Therefore there can only be one
|
||||||
|
* writer. The reader does not require an even sequence count to make
|
||||||
|
* progress, so it is OK to remain preemptible on PREEMPT_RT.
|
||||||
|
*/
|
||||||
|
raw_write_seqcount_begin(&utask->ri_seqcount);
|
||||||
|
|
||||||
for_each_ret_instance_rcu(ri, utask->return_instances)
|
for_each_ret_instance_rcu(ri, utask->return_instances)
|
||||||
hprobe_expire(&ri->hprobe, false);
|
hprobe_expire(&ri->hprobe, false);
|
||||||
|
|
||||||
write_seqcount_end(&utask->ri_seqcount);
|
raw_write_seqcount_end(&utask->ri_seqcount);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct uprobe_task *alloc_utask(void)
|
static struct uprobe_task *alloc_utask(void)
|
||||||
|
|
Loading…
Add table
Reference in a new issue