linux/drivers/ptp/ptp_private.h
Vladimir Oltean 5ab73b010c ptp: fix breakage after ptp_vclock_in_use() rework
What is broken
--------------

ptp4l, and any other application which calls clock_adjtime() on a
physical clock, is greeted with error -EBUSY after commit 87f7ce260a
("ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use()").

Explanation for the breakage
----------------------------

The blamed commit was based on the false assumption that
ptp_vclock_in_use() callers already test for n_vclocks prior to calling
this function.

This is notably incorrect for the code path below, in which there is, in
fact, no n_vclocks test:

ptp_clock_adjtime()
-> ptp_clock_freerun()
   -> ptp_vclock_in_use()

The result is that any clock adjustment on any physical clock is now
impossible. This is _despite_ there not being any vclock over this
physical clock.

$ ptp4l -i eno0 -2 -P -m
ptp4l[58.425]: selected /dev/ptp0 as PTP clock
[   58.429749] ptp: physical clock is free running
ptp4l[58.431]: Failed to open /dev/ptp0: Device or resource busy
failed to create a clock
$ cat /sys/class/ptp/ptp0/n_vclocks
0

The patch makes the ptp_vclock_in_use() function say "if it's not a
virtual clock, then this physical clock does have virtual clocks on
top".

Then ptp_clock_freerun() uses this information to say "this physical
clock has virtual clocks on top, so it must stay free-running".

Then ptp_clock_adjtime() uses this information to say "well, if this
physical clock has to be free-running, I can't do it, return -EBUSY".

Simply put, ptp_vclock_in_use() cannot be simplified so as to remove the
test whether vclocks are in use.

What did the blamed commit intend to fix
----------------------------------------

The blamed commit presents a lockdep warning stating "possible recursive
locking detected", with the n_vclocks_store() and ptp_clock_unregister()
functions involved.

The recursive locking seems this:
n_vclocks_store()
-> mutex_lock_interruptible(&ptp->n_vclocks_mux) // 1
-> device_for_each_child_reverse(..., unregister_vclock)
   -> unregister_vclock()
      -> ptp_vclock_unregister()
         -> ptp_clock_unregister()
            -> ptp_vclock_in_use()
               -> mutex_lock_interruptible(&ptp->n_vclocks_mux) // 2

The issue can be triggered by creating and then deleting vclocks:
$ echo 2 > /sys/class/ptp/ptp0/n_vclocks
$ echo 0 > /sys/class/ptp/ptp0/n_vclocks

But note that in the original stack trace, the address of the first lock
is different from the address of the second lock. This is because at
step 1 marked above, &ptp->n_vclocks_mux is the lock of the parent
(physical) PTP clock, and at step 2, the lock is of the child (virtual)
PTP clock. They are different locks of different devices.

In this situation there is no real deadlock, the lockdep warning is
caused by the fact that the mutexes have the same lock class on both the
parent and the child. Functionally it is fine.

Proposed alternative solution
-----------------------------

We must reintroduce the body of ptp_vclock_in_use() mostly as it was
structured prior to the blamed commit, but avoid the lockdep warning.

Based on the fact that vclocks cannot be nested on top of one another
(ptp_is_attribute_visible() hides n_vclocks for virtual clocks), we
already know that ptp->n_vclocks is zero for a virtual clock. And
ptp->is_virtual_clock is a runtime invariant, established at
ptp_clock_register() time and never changed. There is no need to
serialize on any mutex in order to read ptp->is_virtual_clock, and we
take advantage of that by moving it outside the lock.

Thus, virtual clocks do not need to acquire &ptp->n_vclocks_mux at
all, and step 2 in the code walkthrough above can simply go away.
We can simply return false to the question "ptp_vclock_in_use(a virtual
clock)".

Other notes
-----------

Releasing &ptp->n_vclocks_mux before ptp_vclock_in_use() returns
execution seems racy, because the returned value can become stale as
soon as the function returns and before the return value is used (i.e.
n_vclocks_store() can run any time). The locking requirement should
somehow be transferred to the caller, to ensure a longer life time for
the returned value, but this seems out of scope for this severe bug fix.

Because we are also fixing up the logic from the original commit, there
is another Fixes: tag for that.

Fixes: 87f7ce260a ("ptp: remove ptp->n_vclocks check logic in ptp_vclock_in_use()")
Fixes: 73f37068d5 ("ptp: support ptp physical/virtual clocks conversion")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://patch.msgid.link/20250613174749.406826-2-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
2025-06-17 16:13:09 -07:00

167 lines
4.7 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* PTP 1588 clock support - private declarations for the core module.
*
* Copyright (C) 2010 OMICRON electronics GmbH
*/
#ifndef _PTP_PRIVATE_H_
#define _PTP_PRIVATE_H_
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/kthread.h>
#include <linux/mutex.h>
#include <linux/posix-clock.h>
#include <linux/ptp_clock.h>
#include <linux/ptp_clock_kernel.h>
#include <linux/time.h>
#include <linux/list.h>
#include <linux/bitmap.h>
#include <linux/debugfs.h>
#define PTP_MAX_TIMESTAMPS 128
#define PTP_BUF_TIMESTAMPS 30
#define PTP_DEFAULT_MAX_VCLOCKS 20
#define PTP_MAX_CHANNELS 2048
struct timestamp_event_queue {
struct ptp_extts_event buf[PTP_MAX_TIMESTAMPS];
int head;
int tail;
spinlock_t lock;
struct list_head qlist;
unsigned long *mask;
struct dentry *debugfs_instance;
struct debugfs_u32_array dfs_bitmap;
};
struct ptp_clock {
struct posix_clock clock;
struct device dev;
struct ptp_clock_info *info;
dev_t devid;
int index; /* index into clocks.map */
struct pps_device *pps_source;
long dialed_frequency; /* remembers the frequency adjustment */
struct list_head tsevqs; /* timestamp fifo list */
spinlock_t tsevqs_lock; /* protects tsevqs from concurrent access */
struct mutex pincfg_mux; /* protect concurrent info->pin_config access */
wait_queue_head_t tsev_wq;
int defunct; /* tells readers to go away when clock is being removed */
struct device_attribute *pin_dev_attr;
struct attribute **pin_attr;
struct attribute_group pin_attr_group;
/* 1st entry is a pointer to the real group, 2nd is NULL terminator */
const struct attribute_group *pin_attr_groups[2];
struct kthread_worker *kworker;
struct kthread_delayed_work aux_work;
unsigned int max_vclocks;
unsigned int n_vclocks;
int *vclock_index;
struct mutex n_vclocks_mux; /* protect concurrent n_vclocks access */
bool is_virtual_clock;
bool has_cycles;
struct dentry *debugfs_root;
};
#define info_to_vclock(d) container_of((d), struct ptp_vclock, info)
#define cc_to_vclock(d) container_of((d), struct ptp_vclock, cc)
#define dw_to_vclock(d) container_of((d), struct ptp_vclock, refresh_work)
struct ptp_vclock {
struct ptp_clock *pclock;
struct ptp_clock_info info;
struct ptp_clock *clock;
struct hlist_node vclock_hash_node;
struct cyclecounter cc;
struct timecounter tc;
struct mutex lock; /* protects tc/cc */
};
/*
* The function queue_cnt() is safe for readers to call without
* holding q->lock. Readers use this function to verify that the queue
* is nonempty before proceeding with a dequeue operation. The fact
* that a writer might concurrently increment the tail does not
* matter, since the queue remains nonempty nonetheless.
*/
static inline int queue_cnt(const struct timestamp_event_queue *q)
{
/*
* Paired with WRITE_ONCE() in enqueue_external_timestamp(),
* ptp_read(), extts_fifo_show().
*/
int cnt = READ_ONCE(q->tail) - READ_ONCE(q->head);
return cnt < 0 ? PTP_MAX_TIMESTAMPS + cnt : cnt;
}
/* Check if ptp virtual clock is in use */
static inline bool ptp_vclock_in_use(struct ptp_clock *ptp)
{
bool in_use = false;
/* Virtual clocks can't be stacked on top of virtual clocks.
* Avoid acquiring the n_vclocks_mux on virtual clocks, to allow this
* function to be called from code paths where the n_vclocks_mux of the
* parent physical clock is already held. Functionally that's not an
* issue, but lockdep would complain, because they have the same lock
* class.
*/
if (ptp->is_virtual_clock)
return false;
if (mutex_lock_interruptible(&ptp->n_vclocks_mux))
return true;
if (ptp->n_vclocks)
in_use = true;
mutex_unlock(&ptp->n_vclocks_mux);
return in_use;
}
/* Check if ptp clock shall be free running */
static inline bool ptp_clock_freerun(struct ptp_clock *ptp)
{
if (ptp->has_cycles)
return false;
return ptp_vclock_in_use(ptp);
}
extern const struct class ptp_class;
/*
* see ptp_chardev.c
*/
/* caller must hold pincfg_mux */
int ptp_set_pinfunc(struct ptp_clock *ptp, unsigned int pin,
enum ptp_pin_function func, unsigned int chan);
long ptp_ioctl(struct posix_clock_context *pccontext, unsigned int cmd,
unsigned long arg);
int ptp_open(struct posix_clock_context *pccontext, fmode_t fmode);
int ptp_release(struct posix_clock_context *pccontext);
ssize_t ptp_read(struct posix_clock_context *pccontext, uint flags, char __user *buf,
size_t cnt);
__poll_t ptp_poll(struct posix_clock_context *pccontext, struct file *fp,
poll_table *wait);
/*
* see ptp_sysfs.c
*/
extern const struct attribute_group *ptp_groups[];
int ptp_populate_pin_groups(struct ptp_clock *ptp);
void ptp_cleanup_pin_groups(struct ptp_clock *ptp);
struct ptp_vclock *ptp_vclock_register(struct ptp_clock *pclock);
void ptp_vclock_unregister(struct ptp_vclock *vclock);
#endif