2019-03-23 12:24:21 +01:00
|
|
|
/* SPDX-License-Identifier: GPL-2.0 */
|
|
|
|
|
|
|
|
#ifndef __PIDFD_H
|
|
|
|
#define __PIDFD_H
|
|
|
|
|
|
|
|
#define _GNU_SOURCE
|
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <sched.h>
|
|
|
|
#include <signal.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <syscall.h>
|
2025-03-05 11:08:19 +01:00
|
|
|
#include <sys/ioctl.h>
|
2020-03-08 17:26:32 +01:00
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/wait.h>
|
2019-03-23 12:24:21 +01:00
|
|
|
|
2025-07-23 16:13:45 -07:00
|
|
|
/*
|
|
|
|
* Remove the userspace definitions of the following preprocessor symbols
|
|
|
|
* to avoid duplicate-definition warnings from the subsequent in-kernel
|
|
|
|
* definitions.
|
|
|
|
*/
|
|
|
|
#undef SCHED_NORMAL
|
|
|
|
#undef SCHED_FLAG_KEEP_ALL
|
|
|
|
#undef SCHED_FLAG_UTIL_CLAMP
|
|
|
|
|
2019-03-23 12:24:21 +01:00
|
|
|
#include "../kselftest.h"
|
2024-12-02 23:44:52 +01:00
|
|
|
#include "../clone3/clone3_selftests.h"
|
2019-03-23 12:24:21 +01:00
|
|
|
|
2025-06-24 10:29:14 +02:00
|
|
|
#ifndef FD_PIDFS_ROOT
|
|
|
|
#define FD_PIDFS_ROOT -10002
|
|
|
|
#endif
|
|
|
|
|
2019-07-28 00:22:30 +02:00
|
|
|
#ifndef P_PIDFD
|
|
|
|
#define P_PIDFD 3
|
|
|
|
#endif
|
|
|
|
|
2020-07-06 17:49:12 +02:00
|
|
|
#ifndef CLONE_NEWTIME
|
|
|
|
#define CLONE_NEWTIME 0x00000080
|
|
|
|
#endif
|
|
|
|
|
2019-07-28 00:22:30 +02:00
|
|
|
#ifndef CLONE_PIDFD
|
|
|
|
#define CLONE_PIDFD 0x00001000
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __NR_pidfd_open
|
2025-03-23 18:45:18 +01:00
|
|
|
#define __NR_pidfd_open 434
|
2019-07-28 00:22:30 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __NR_pidfd_send_signal
|
2025-03-23 18:45:18 +01:00
|
|
|
#define __NR_pidfd_send_signal 424
|
2019-07-28 00:22:30 +02:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef __NR_clone3
|
2025-03-23 18:45:18 +01:00
|
|
|
#define __NR_clone3 435
|
2019-07-28 00:22:30 +02:00
|
|
|
#endif
|
|
|
|
|
2020-01-07 09:59:27 -08:00
|
|
|
#ifndef __NR_pidfd_getfd
|
2025-03-23 18:45:18 +01:00
|
|
|
#define __NR_pidfd_getfd 438
|
2020-01-07 09:59:27 -08:00
|
|
|
#endif
|
|
|
|
|
2020-09-02 12:21:30 +02:00
|
|
|
#ifndef PIDFD_NONBLOCK
|
|
|
|
#define PIDFD_NONBLOCK O_NONBLOCK
|
|
|
|
#endif
|
|
|
|
|
2025-02-05 13:54:56 +01:00
|
|
|
#ifndef PIDFD_SELF_THREAD
|
|
|
|
#define PIDFD_SELF_THREAD -10000 /* Current thread. */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_SELF_THREAD_GROUP
|
2025-06-24 15:48:49 +02:00
|
|
|
#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */
|
2025-02-05 13:54:56 +01:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_SELF
|
|
|
|
#define PIDFD_SELF PIDFD_SELF_THREAD
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_SELF_PROCESS
|
|
|
|
#define PIDFD_SELF_PROCESS PIDFD_SELF_THREAD_GROUP
|
|
|
|
#endif
|
|
|
|
|
2025-03-05 11:08:19 +01:00
|
|
|
#ifndef PIDFS_IOCTL_MAGIC
|
|
|
|
#define PIDFS_IOCTL_MAGIC 0xFF
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_CGROUP_NAMESPACE
|
|
|
|
#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_IPC_NAMESPACE
|
|
|
|
#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_MNT_NAMESPACE
|
|
|
|
#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_NET_NAMESPACE
|
|
|
|
#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_PID_NAMESPACE
|
|
|
|
#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
|
|
|
|
#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_TIME_NAMESPACE
|
|
|
|
#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
|
|
|
|
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_USER_NAMESPACE
|
|
|
|
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_UTS_NAMESPACE
|
|
|
|
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_GET_INFO
|
|
|
|
#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_INFO_PID
|
|
|
|
#define PIDFD_INFO_PID (1UL << 0) /* Always returned, even if not requested */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_INFO_CREDS
|
|
|
|
#define PIDFD_INFO_CREDS (1UL << 1) /* Always returned, even if not requested */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_INFO_CGROUPID
|
|
|
|
#define PIDFD_INFO_CGROUPID (1UL << 2) /* Always returned if available, even if not requested */
|
|
|
|
#endif
|
|
|
|
|
2025-03-05 11:08:20 +01:00
|
|
|
#ifndef PIDFD_INFO_EXIT
|
|
|
|
#define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */
|
|
|
|
#endif
|
|
|
|
|
2025-05-16 13:25:35 +02:00
|
|
|
#ifndef PIDFD_INFO_COREDUMP
|
|
|
|
#define PIDFD_INFO_COREDUMP (1UL << 4)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_COREDUMPED
|
|
|
|
#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_COREDUMP_SKIP
|
|
|
|
#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_COREDUMP_USER
|
|
|
|
#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef PIDFD_COREDUMP_ROOT
|
|
|
|
#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */
|
|
|
|
#endif
|
|
|
|
|
2025-03-05 11:08:25 +01:00
|
|
|
#ifndef PIDFD_THREAD
|
|
|
|
#define PIDFD_THREAD O_EXCL
|
|
|
|
#endif
|
|
|
|
|
2025-03-05 11:08:19 +01:00
|
|
|
struct pidfd_info {
|
|
|
|
__u64 mask;
|
|
|
|
__u64 cgroupid;
|
|
|
|
__u32 pid;
|
|
|
|
__u32 tgid;
|
|
|
|
__u32 ppid;
|
|
|
|
__u32 ruid;
|
|
|
|
__u32 rgid;
|
|
|
|
__u32 euid;
|
|
|
|
__u32 egid;
|
|
|
|
__u32 suid;
|
|
|
|
__u32 sgid;
|
|
|
|
__u32 fsuid;
|
|
|
|
__u32 fsgid;
|
2025-03-05 11:08:20 +01:00
|
|
|
__s32 exit_code;
|
2025-05-16 13:25:35 +02:00
|
|
|
__u32 coredump_mask;
|
|
|
|
__u32 __spare1;
|
2025-03-05 11:08:19 +01:00
|
|
|
};
|
|
|
|
|
2019-03-23 12:24:21 +01:00
|
|
|
/*
|
|
|
|
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
|
|
|
|
* That means, when it wraps around any pid < 300 will be skipped.
|
|
|
|
* So we need to use a pid > 300 in order to test recycling.
|
|
|
|
*/
|
|
|
|
#define PID_RECYCLE 1000
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Define a few custom error codes for the child process to clearly indicate
|
|
|
|
* what is happening. This way we can tell the difference between a system
|
|
|
|
* error, a test error, etc.
|
|
|
|
*/
|
|
|
|
#define PIDFD_PASS 0
|
|
|
|
#define PIDFD_FAIL 1
|
|
|
|
#define PIDFD_ERROR 2
|
|
|
|
#define PIDFD_SKIP 3
|
|
|
|
#define PIDFD_XFAIL 4
|
|
|
|
|
2024-12-02 23:44:52 +01:00
|
|
|
static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options)
|
|
|
|
{
|
|
|
|
return syscall(__NR_waitid, which, pid, info, options, NULL);
|
|
|
|
}
|
|
|
|
|
pidfd: fix test failure due to stack overflow on some arches
When running the pidfd_fdinfo_test on arm64, it fails for me. After some
digging, the reason is that the child exits due to SIGBUS, because it
overflows the 1024 byte stack we've reserved for it.
To fix the issue, increase the stack size to 8192 bytes (this number is
somewhat arbitrary, and was arrived at through experimentation -- I kept
doubling until the failure no longer occurred).
Also, let's make the issue easier to debug. wait_for_pid() returns an
ambiguous value: it may return -1 in all of these cases:
1. waitpid() itself returned -1
2. waitpid() returned success, but we found !WIFEXITED(status).
3. The child process exited, but it did so with a -1 exit code.
There's no way for the caller to tell the difference. So, at least log
which occurred, so the test runner can debug things.
While debugging this, I found that we had !WIFEXITED(), because the
child exited due to a signal. This seems like a reasonably common case,
so also print out whether or not we have WIFSIGNALED(), and the
associated WTERMSIG() (if any). This lets us see the SIGBUS I'm fixing
clearly when it occurs.
Finally, I'm suspicious of allocating the child's stack on our stack.
man clone(2) suggests that the correct way to do this is with mmap(),
and in particular by setting MAP_STACK. So, switch to doing it that way
instead.
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-01-27 13:29:51 -08:00
|
|
|
static inline int wait_for_pid(pid_t pid)
|
2019-03-23 12:24:21 +01:00
|
|
|
{
|
|
|
|
int status, ret;
|
|
|
|
|
|
|
|
again:
|
|
|
|
ret = waitpid(pid, &status, 0);
|
|
|
|
if (ret == -1) {
|
|
|
|
if (errno == EINTR)
|
|
|
|
goto again;
|
|
|
|
|
pidfd: fix test failure due to stack overflow on some arches
When running the pidfd_fdinfo_test on arm64, it fails for me. After some
digging, the reason is that the child exits due to SIGBUS, because it
overflows the 1024 byte stack we've reserved for it.
To fix the issue, increase the stack size to 8192 bytes (this number is
somewhat arbitrary, and was arrived at through experimentation -- I kept
doubling until the failure no longer occurred).
Also, let's make the issue easier to debug. wait_for_pid() returns an
ambiguous value: it may return -1 in all of these cases:
1. waitpid() itself returned -1
2. waitpid() returned success, but we found !WIFEXITED(status).
3. The child process exited, but it did so with a -1 exit code.
There's no way for the caller to tell the difference. So, at least log
which occurred, so the test runner can debug things.
While debugging this, I found that we had !WIFEXITED(), because the
child exited due to a signal. This seems like a reasonably common case,
so also print out whether or not we have WIFSIGNALED(), and the
associated WTERMSIG() (if any). This lets us see the SIGBUS I'm fixing
clearly when it occurs.
Finally, I'm suspicious of allocating the child's stack on our stack.
man clone(2) suggests that the correct way to do this is with mmap(),
and in particular by setting MAP_STACK. So, switch to doing it that way
instead.
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-01-27 13:29:51 -08:00
|
|
|
ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
|
2019-03-23 12:24:21 +01:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
pidfd: fix test failure due to stack overflow on some arches
When running the pidfd_fdinfo_test on arm64, it fails for me. After some
digging, the reason is that the child exits due to SIGBUS, because it
overflows the 1024 byte stack we've reserved for it.
To fix the issue, increase the stack size to 8192 bytes (this number is
somewhat arbitrary, and was arrived at through experimentation -- I kept
doubling until the failure no longer occurred).
Also, let's make the issue easier to debug. wait_for_pid() returns an
ambiguous value: it may return -1 in all of these cases:
1. waitpid() itself returned -1
2. waitpid() returned success, but we found !WIFEXITED(status).
3. The child process exited, but it did so with a -1 exit code.
There's no way for the caller to tell the difference. So, at least log
which occurred, so the test runner can debug things.
While debugging this, I found that we had !WIFEXITED(), because the
child exited due to a signal. This seems like a reasonably common case,
so also print out whether or not we have WIFSIGNALED(), and the
associated WTERMSIG() (if any). This lets us see the SIGBUS I'm fixing
clearly when it occurs.
Finally, I'm suspicious of allocating the child's stack on our stack.
man clone(2) suggests that the correct way to do this is with mmap(),
and in particular by setting MAP_STACK. So, switch to doing it that way
instead.
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-01-27 13:29:51 -08:00
|
|
|
if (!WIFEXITED(status)) {
|
|
|
|
ksft_print_msg(
|
|
|
|
"waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
|
|
|
|
WIFSIGNALED(status), WTERMSIG(status));
|
2019-03-23 12:24:21 +01:00
|
|
|
return -1;
|
pidfd: fix test failure due to stack overflow on some arches
When running the pidfd_fdinfo_test on arm64, it fails for me. After some
digging, the reason is that the child exits due to SIGBUS, because it
overflows the 1024 byte stack we've reserved for it.
To fix the issue, increase the stack size to 8192 bytes (this number is
somewhat arbitrary, and was arrived at through experimentation -- I kept
doubling until the failure no longer occurred).
Also, let's make the issue easier to debug. wait_for_pid() returns an
ambiguous value: it may return -1 in all of these cases:
1. waitpid() itself returned -1
2. waitpid() returned success, but we found !WIFEXITED(status).
3. The child process exited, but it did so with a -1 exit code.
There's no way for the caller to tell the difference. So, at least log
which occurred, so the test runner can debug things.
While debugging this, I found that we had !WIFEXITED(), because the
child exited due to a signal. This seems like a reasonably common case,
so also print out whether or not we have WIFSIGNALED(), and the
associated WTERMSIG() (if any). This lets us see the SIGBUS I'm fixing
clearly when it occurs.
Finally, I'm suspicious of allocating the child's stack on our stack.
man clone(2) suggests that the correct way to do this is with mmap(),
and in particular by setting MAP_STACK. So, switch to doing it that way
instead.
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-01-27 13:29:51 -08:00
|
|
|
}
|
2019-03-23 12:24:21 +01:00
|
|
|
|
pidfd: fix test failure due to stack overflow on some arches
When running the pidfd_fdinfo_test on arm64, it fails for me. After some
digging, the reason is that the child exits due to SIGBUS, because it
overflows the 1024 byte stack we've reserved for it.
To fix the issue, increase the stack size to 8192 bytes (this number is
somewhat arbitrary, and was arrived at through experimentation -- I kept
doubling until the failure no longer occurred).
Also, let's make the issue easier to debug. wait_for_pid() returns an
ambiguous value: it may return -1 in all of these cases:
1. waitpid() itself returned -1
2. waitpid() returned success, but we found !WIFEXITED(status).
3. The child process exited, but it did so with a -1 exit code.
There's no way for the caller to tell the difference. So, at least log
which occurred, so the test runner can debug things.
While debugging this, I found that we had !WIFEXITED(), because the
child exited due to a signal. This seems like a reasonably common case,
so also print out whether or not we have WIFSIGNALED(), and the
associated WTERMSIG() (if any). This lets us see the SIGBUS I'm fixing
clearly when it occurs.
Finally, I'm suspicious of allocating the child's stack on our stack.
man clone(2) suggests that the correct way to do this is with mmap(),
and in particular by setting MAP_STACK. So, switch to doing it that way
instead.
Signed-off-by: Axel Rasmussen <axelrasmussen@google.com>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
2022-01-27 13:29:51 -08:00
|
|
|
ret = WEXITSTATUS(status);
|
|
|
|
return ret;
|
2019-03-23 12:24:21 +01:00
|
|
|
}
|
|
|
|
|
2019-07-26 09:22:25 -07:00
|
|
|
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
|
|
|
|
{
|
|
|
|
return syscall(__NR_pidfd_open, pid, flags);
|
|
|
|
}
|
|
|
|
|
2019-07-28 00:22:30 +02:00
|
|
|
static inline int sys_pidfd_send_signal(int pidfd, int sig, siginfo_t *info,
|
|
|
|
unsigned int flags)
|
|
|
|
{
|
|
|
|
return syscall(__NR_pidfd_send_signal, pidfd, sig, info, flags);
|
|
|
|
}
|
2019-03-23 12:24:21 +01:00
|
|
|
|
2020-01-07 09:59:27 -08:00
|
|
|
static inline int sys_pidfd_getfd(int pidfd, int fd, int flags)
|
|
|
|
{
|
|
|
|
return syscall(__NR_pidfd_getfd, pidfd, fd, flags);
|
|
|
|
}
|
|
|
|
|
2020-06-17 00:48:54 +02:00
|
|
|
static inline int sys_memfd_create(const char *name, unsigned int flags)
|
|
|
|
{
|
|
|
|
return syscall(__NR_memfd_create, name, flags);
|
|
|
|
}
|
|
|
|
|
2024-12-02 23:44:52 +01:00
|
|
|
static inline pid_t create_child(int *pidfd, unsigned flags)
|
|
|
|
{
|
|
|
|
struct __clone_args args = {
|
|
|
|
.flags = CLONE_PIDFD | flags,
|
|
|
|
.exit_signal = SIGCHLD,
|
|
|
|
.pidfd = ptr_to_u64(pidfd),
|
|
|
|
};
|
|
|
|
|
|
|
|
return sys_clone3(&args, sizeof(struct __clone_args));
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline ssize_t read_nointr(int fd, void *buf, size_t count)
|
|
|
|
{
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
do {
|
|
|
|
ret = read(fd, buf, count);
|
|
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
|
|
|
|
{
|
|
|
|
ssize_t ret;
|
|
|
|
|
|
|
|
do {
|
|
|
|
ret = write(fd, buf, count);
|
|
|
|
} while (ret < 0 && errno == EINTR);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-03-05 11:08:26 +01:00
|
|
|
static inline int sys_execveat(int dirfd, const char *pathname,
|
|
|
|
char *const argv[], char *const envp[],
|
|
|
|
int flags)
|
|
|
|
{
|
|
|
|
return syscall(__NR_execveat, dirfd, pathname, argv, envp, flags);
|
|
|
|
}
|
|
|
|
|
2019-03-23 12:24:21 +01:00
|
|
|
#endif /* __PIDFD_H */
|