mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
Notable changes:
- remove obsolete network transports - remove PCI IO port support - start adding seccomp-based process handling instead of ptrace -----BEGIN PGP SIGNATURE----- iQIzBAABCgAdFiEEpeA8sTs3M8SN2hR410qiO8sPaAAFAmhBYTIACgkQ10qiO8sP aACobQ//ZggBPinLNWXep4pcfK0/x1mx76cKVIpf1TSI6BpG1kQmkpOIxYDE6JTv yo1Ydoy7CMs+xxkDRpsm85qcq8BHhK4Ebfg/jYmRSCSKxtWEeNHJv3RmauQGAxym iGLR4Wd7dju0ywiOSAr66cZ0OYHKUbT2j4Vxybb8YG5sJ2s3YVJBYsiGJDtmjF9q ezySizAhW8KLScSiqWDruHUq7yEGWa8fp2RNPKT5WhOobZRAJI5upFNwHh0dINaK 8Qntui4IgG922toBVS26g8ZwV6iJlBUsDttpWZEW1xFBxvxhWI5temW1LVBTvs8M mTCiKRd/oGwgtzNmWwXPzW7oJbBA/IlYtGognmaPgjwomyeGmWbnIWsB/1VV1QL4 5+1+zGQzs8xnN2TsOkIQSiWEEkolreG8NFFY2PZPxiSH6lvkYvlin76DbA+HbmWR oU8GBKAwJmn15yxPuRRaCtUaVr4M+siIfBVp5NCgvlnc6scCWVdGlT9e59D6T886 ZCY4O3UOzhzi9f0xCMx8+XVGjCPntlqLJJQCnSTrtS0+E7B78CxYNZRSLQ83HLa/ ivDA3fu/rvBON/gRYqd1YDOy0NkRddDZLQEwiedRkRSI5TZdEDQZMnOFdqSDEd/D doWw8M3m6g5o2zTOF6XkU9Se1VhkkRDUgxQ+AqLCoMIoM3WVby8= =iHzS -----END PGP SIGNATURE----- Merge tag 'uml-for-linux-6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux Pull UML updates from Johannes Berg: "The only really new thing is the long-standing seccomp work (originally from 2021!). Wven if it still isn't enabled by default due to security concerns it can still be used e.g. for tests. - remove obsolete network transports - remove PCI IO port support - start adding seccomp-based process handling instead of ptrace" * tag 'uml-for-linux-6.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/uml/linux: (29 commits) um: remove "extern" from implementation of sigchld_handler um: fix unused variable warning um: fix SECCOMP 32bit xstate register restore um: pass FD for memory operations when needed um: Add SECCOMP support detection and initialization um: Implement kernel side of SECCOMP based process handling um: Track userspace children dying in SECCOMP mode um: Add helper functions to get/set state for SECCOMP um: Add stub side of SECCOMP/futex based process handling um: Move faultinfo extraction into userspace routine um: vector: Use mac_pton() for MAC address parsing um: vector: Clean up and modernize log messages um: chan_kern: use raw spinlock for irqs_to_free_lock MAINTAINERS: remove obsolete file entry in TUN/TAP DRIVER um: Fix tgkill compile error on old host OSes um: stop using PCI port I/O um: Remove legacy network transport infrastructure um: vector: Eliminate the dependency on uml_net um: Remove obsolete legacy network transports um/asm: Replace "REP; NOP" with PAUSE mnemonic ...
This commit is contained in:
commit
cfc4ca8986
80 changed files with 2683 additions and 4361 deletions
|
@ -147,18 +147,12 @@ The image hostname will be set to the same as the host on which you
|
|||
are creating its image. It is a good idea to change that to avoid
|
||||
"Oh, bummer, I rebooted the wrong machine".
|
||||
|
||||
UML supports two classes of network devices - the older uml_net ones
|
||||
which are scheduled for obsoletion. These are called ethX. It also
|
||||
supports the newer vector IO devices which are significantly faster
|
||||
and have support for some standard virtual network encapsulations like
|
||||
Ethernet over GRE and Ethernet over L2TPv3. These are called vec0.
|
||||
UML supports vector I/O high performance network devices which have
|
||||
support for some standard virtual network encapsulations like
|
||||
Ethernet over GRE and Ethernet over L2TPv3. These are called vecX.
|
||||
|
||||
Depending on which one is in use, ``/etc/network/interfaces`` will
|
||||
need entries like::
|
||||
|
||||
# legacy UML network devices
|
||||
auto eth0
|
||||
iface eth0 inet dhcp
|
||||
When vector network devices are in use, ``/etc/network/interfaces``
|
||||
will need entries like::
|
||||
|
||||
# vector UML network devices
|
||||
auto vec0
|
||||
|
@ -219,16 +213,6 @@ remote UML and other VM instances.
|
|||
+-----------+--------+------------------------------------+------------+
|
||||
| vde | vector | dep. on VDE VPN: Virt.Net Locator | varies |
|
||||
+-----------+--------+------------------------------------+------------+
|
||||
| tuntap | legacy | none | ~ 500Mbit |
|
||||
+-----------+--------+------------------------------------+------------+
|
||||
| daemon | legacy | none | ~ 450Mbit |
|
||||
+-----------+--------+------------------------------------+------------+
|
||||
| socket | legacy | none | ~ 450Mbit |
|
||||
+-----------+--------+------------------------------------+------------+
|
||||
| ethertap | legacy | obsolete | ~ 500Mbit |
|
||||
+-----------+--------+------------------------------------+------------+
|
||||
| vde | legacy | obsolete | ~ 500Mbit |
|
||||
+-----------+--------+------------------------------------+------------+
|
||||
|
||||
* All transports which have tso and checksum offloads can deliver speeds
|
||||
approaching 10G on TCP streams.
|
||||
|
@ -236,27 +220,16 @@ remote UML and other VM instances.
|
|||
* All transports which have multi-packet rx and/or tx can deliver pps
|
||||
rates of up to 1Mps or more.
|
||||
|
||||
* All legacy transports are generally limited to ~600-700MBit and 0.05Mps.
|
||||
|
||||
* GRE and L2TPv3 allow connections to all of: local machine, remote
|
||||
machines, remote network devices and remote UML instances.
|
||||
|
||||
* Socket allows connections only between UML instances.
|
||||
|
||||
* Daemon and bess require running a local switch. This switch may be
|
||||
connected to the host as well.
|
||||
|
||||
|
||||
Network configuration privileges
|
||||
================================
|
||||
|
||||
The majority of the supported networking modes need ``root`` privileges.
|
||||
For example, in the legacy tuntap networking mode, users were required
|
||||
to be part of the group associated with the tunnel device.
|
||||
|
||||
For newer network drivers like the vector transports, ``root`` privilege
|
||||
is required to fire an ioctl to setup the tun interface and/or use
|
||||
raw sockets where needed.
|
||||
For example, for vector transports, ``root`` privilege is required to fire
|
||||
an ioctl to setup the tun interface and/or use raw sockets where needed.
|
||||
|
||||
This can be achieved by granting the user a particular capability instead
|
||||
of running UML as root. In case of vector transport, a user can add the
|
||||
|
@ -610,12 +583,6 @@ connect to a local area cloud (all the UML nodes using the same
|
|||
multicast address running on hosts in the same multicast domain (LAN)
|
||||
will be automagically connected together to a virtual LAN.
|
||||
|
||||
Configuring Legacy transports
|
||||
=============================
|
||||
|
||||
Legacy transports are now considered obsolete. Please use the vector
|
||||
versions.
|
||||
|
||||
***********
|
||||
Running UML
|
||||
***********
|
||||
|
|
|
@ -25156,13 +25156,12 @@ L: linux-parisc@vger.kernel.org
|
|||
S: Orphan
|
||||
F: drivers/net/ethernet/dec/tulip/
|
||||
|
||||
TUN/TAP driver
|
||||
TUN/TAP DRIVER
|
||||
M: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
|
||||
M: Jason Wang <jasowang@redhat.com>
|
||||
S: Maintained
|
||||
W: http://vtun.sourceforge.net/tun
|
||||
F: Documentation/networking/tuntap.rst
|
||||
F: arch/um/os-Linux/drivers/
|
||||
F: drivers/net/tap.c
|
||||
F: drivers/net/tun*
|
||||
|
||||
|
|
|
@ -52,13 +52,7 @@ config NO_IOMEM
|
|||
config UML_IOMEM_EMULATION
|
||||
bool
|
||||
select INDIRECT_IOMEM
|
||||
select HAS_IOPORT
|
||||
select GENERIC_PCI_IOMAP
|
||||
select GENERIC_IOMAP
|
||||
select NO_GENERIC_PCI_IOPORT_MAP
|
||||
|
||||
config NO_IOPORT_MAP
|
||||
def_bool !UML_IOMEM_EMULATION
|
||||
|
||||
config ISA
|
||||
bool
|
||||
|
|
|
@ -52,13 +52,6 @@ CONFIG_PACKET=y
|
|||
CONFIG_UNIX=y
|
||||
CONFIG_INET=y
|
||||
# CONFIG_IPV6 is not set
|
||||
CONFIG_UML_NET=y
|
||||
CONFIG_UML_NET_ETHERTAP=y
|
||||
CONFIG_UML_NET_TUNTAP=y
|
||||
CONFIG_UML_NET_SLIP=y
|
||||
CONFIG_UML_NET_DAEMON=y
|
||||
CONFIG_UML_NET_MCAST=y
|
||||
CONFIG_UML_NET_SLIRP=y
|
||||
CONFIG_EXT4_FS=y
|
||||
CONFIG_QUOTA=y
|
||||
CONFIG_AUTOFS_FS=m
|
||||
|
|
|
@ -51,13 +51,6 @@ CONFIG_PACKET=y
|
|||
CONFIG_UNIX=y
|
||||
CONFIG_INET=y
|
||||
# CONFIG_IPV6 is not set
|
||||
CONFIG_UML_NET=y
|
||||
CONFIG_UML_NET_ETHERTAP=y
|
||||
CONFIG_UML_NET_TUNTAP=y
|
||||
CONFIG_UML_NET_SLIP=y
|
||||
CONFIG_UML_NET_DAEMON=y
|
||||
CONFIG_UML_NET_MCAST=y
|
||||
CONFIG_UML_NET_SLIRP=y
|
||||
CONFIG_EXT4_FS=y
|
||||
CONFIG_QUOTA=y
|
||||
CONFIG_AUTOFS_FS=m
|
||||
|
|
|
@ -124,206 +124,18 @@ endmenu
|
|||
menu "UML Network Devices"
|
||||
depends on NET
|
||||
|
||||
# UML virtual driver
|
||||
config UML_NET
|
||||
bool "Virtual network device"
|
||||
help
|
||||
While the User-Mode port cannot directly talk to any physical
|
||||
hardware devices, this choice and the following transport options
|
||||
provide one or more virtual network devices through which the UML
|
||||
kernels can talk to each other, the host, and with the host's help,
|
||||
machines on the outside world.
|
||||
|
||||
For more information, including explanations of the networking and
|
||||
sample configurations, see
|
||||
<http://user-mode-linux.sourceforge.net/old/networking.html>.
|
||||
|
||||
If you'd like to be able to enable networking in the User-Mode
|
||||
linux environment, say Y; otherwise say N. Note that you must
|
||||
enable at least one of the following transport options to actually
|
||||
make use of UML networking.
|
||||
|
||||
config UML_NET_ETHERTAP
|
||||
bool "Ethertap transport (obsolete)"
|
||||
depends on UML_NET
|
||||
help
|
||||
The Ethertap User-Mode Linux network transport allows a single
|
||||
running UML to exchange packets with its host over one of the
|
||||
host's Ethertap devices, such as /dev/tap0. Additional running
|
||||
UMLs can use additional Ethertap devices, one per running UML.
|
||||
While the UML believes it's on a (multi-device, broadcast) virtual
|
||||
Ethernet network, it's in fact communicating over a point-to-point
|
||||
link with the host.
|
||||
|
||||
To use this, your host kernel must have support for Ethertap
|
||||
devices. Also, if your host kernel is 2.4.x, it must have
|
||||
CONFIG_NETLINK_DEV configured as Y or M.
|
||||
|
||||
For more information, see
|
||||
<http://user-mode-linux.sourceforge.net/old/networking.html> That site
|
||||
has examples of the UML command line to use to enable Ethertap
|
||||
networking.
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config UML_NET_TUNTAP
|
||||
bool "TUN/TAP transport (obsolete)"
|
||||
depends on UML_NET
|
||||
help
|
||||
The UML TUN/TAP network transport allows a UML instance to exchange
|
||||
packets with the host over a TUN/TAP device. This option will only
|
||||
work with a 2.4 host, unless you've applied the TUN/TAP patch to
|
||||
your 2.2 host kernel.
|
||||
|
||||
To use this transport, your host kernel must have support for TUN/TAP
|
||||
devices, either built-in or as a module.
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config UML_NET_SLIP
|
||||
bool "SLIP transport (obsolete)"
|
||||
depends on UML_NET
|
||||
help
|
||||
The slip User-Mode Linux network transport allows a running UML to
|
||||
network with its host over a point-to-point link. Unlike Ethertap,
|
||||
which can carry any Ethernet frame (and hence even non-IP packets),
|
||||
the slip transport can only carry IP packets.
|
||||
|
||||
To use this, your host must support slip devices.
|
||||
|
||||
For more information, see
|
||||
<http://user-mode-linux.sourceforge.net/old/networking.html>.
|
||||
has examples of the UML command line to use to enable slip
|
||||
networking, and details of a few quirks with it.
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config UML_NET_DAEMON
|
||||
bool "Daemon transport (obsolete)"
|
||||
depends on UML_NET
|
||||
help
|
||||
This User-Mode Linux network transport allows one or more running
|
||||
UMLs on a single host to communicate with each other, but not to
|
||||
the host.
|
||||
|
||||
To use this form of networking, you'll need to run the UML
|
||||
networking daemon on the host.
|
||||
|
||||
For more information, see
|
||||
<http://user-mode-linux.sourceforge.net/old/networking.html> That site
|
||||
has examples of the UML command line to use to enable Daemon
|
||||
networking.
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config UML_NET_DAEMON_DEFAULT_SOCK
|
||||
string "Default socket for daemon transport"
|
||||
default "/tmp/uml.ctl"
|
||||
depends on UML_NET_DAEMON
|
||||
help
|
||||
This option allows setting the default socket for the daemon
|
||||
transport, normally it defaults to /tmp/uml.ctl.
|
||||
|
||||
config UML_NET_VECTOR
|
||||
bool "Vector I/O high performance network devices"
|
||||
depends on UML_NET
|
||||
select MAY_HAVE_RUNTIME_DEPS
|
||||
help
|
||||
This User-Mode Linux network driver uses multi-message send
|
||||
and receive functions. The host running the UML guest must have
|
||||
a linux kernel version above 3.0 and a libc version > 2.13.
|
||||
This driver provides tap, raw, gre and l2tpv3 network transports
|
||||
with up to 4 times higher network throughput than the UML network
|
||||
drivers.
|
||||
This driver provides tap, raw, gre and l2tpv3 network transports.
|
||||
|
||||
config UML_NET_VDE
|
||||
bool "VDE transport (obsolete)"
|
||||
depends on UML_NET
|
||||
depends on !MODVERSIONS
|
||||
select MAY_HAVE_RUNTIME_DEPS
|
||||
help
|
||||
This User-Mode Linux network transport allows one or more running
|
||||
UMLs on a single host to communicate with each other and also
|
||||
with the rest of the world using Virtual Distributed Ethernet,
|
||||
an improved fork of uml_switch.
|
||||
|
||||
You must have libvdeplug installed in order to build the vde
|
||||
transport into UML.
|
||||
|
||||
To use this form of networking, you will need to run vde_switch
|
||||
on the host.
|
||||
|
||||
For more information, see <http://wiki.virtualsquare.org/>
|
||||
That site has a good overview of what VDE is and also examples
|
||||
of the UML command line to use to enable VDE networking.
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config UML_NET_MCAST
|
||||
bool "Multicast transport (obsolete)"
|
||||
depends on UML_NET
|
||||
help
|
||||
This Multicast User-Mode Linux network transport allows multiple
|
||||
UMLs (even ones running on different host machines!) to talk to
|
||||
each other over a virtual ethernet network. However, it requires
|
||||
at least one UML with one of the other transports to act as a
|
||||
bridge if any of them need to be able to talk to their hosts or any
|
||||
other IP machines.
|
||||
|
||||
To use this, your host kernel(s) must support IP Multicasting.
|
||||
|
||||
For more information, see
|
||||
<http://user-mode-linux.sourceforge.net/old/networking.html> That site
|
||||
has examples of the UML command line to use to enable Multicast
|
||||
networking, and notes about the security of this approach.
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
config UML_NET_SLIRP
|
||||
bool "SLiRP transport (obsolete)"
|
||||
depends on UML_NET
|
||||
help
|
||||
The SLiRP User-Mode Linux network transport allows a running UML
|
||||
to network by invoking a program that can handle SLIP encapsulated
|
||||
packets. This is commonly (but not limited to) the application
|
||||
known as SLiRP, a program that can re-socket IP packets back onto
|
||||
he host on which it is run. Only IP packets are supported,
|
||||
unlike other network transports that can handle all Ethernet
|
||||
frames. In general, slirp allows the UML the same IP connectivity
|
||||
to the outside world that the host user is permitted, and unlike
|
||||
other transports, SLiRP works without the need of root level
|
||||
privileges, setuid binaries, or SLIP devices on the host. This
|
||||
also means not every type of connection is possible, but most
|
||||
situations can be accommodated with carefully crafted slirp
|
||||
commands that can be passed along as part of the network device's
|
||||
setup string. The effect of this transport on the UML is similar
|
||||
that of a host behind a firewall that masquerades all network
|
||||
connections passing through it (but is less secure).
|
||||
|
||||
NOTE: THIS TRANSPORT IS DEPRECATED AND WILL BE REMOVED SOON!!! Please
|
||||
migrate to UML_NET_VECTOR.
|
||||
|
||||
If unsure, say N.
|
||||
|
||||
Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
|
||||
For more information, including explanations of the networking
|
||||
and sample configurations, see
|
||||
<file:Documentation/virt/uml/user_mode_linux_howto_v2.rst>.
|
||||
|
||||
endmenu
|
||||
|
||||
|
@ -367,3 +179,11 @@ config UML_PCI_OVER_VIRTIO_DEVICE_ID
|
|||
There's no official device ID assigned (yet), set the one you
|
||||
wish to use for experimentation here. The default of -1 is
|
||||
not valid and will cause the driver to fail at probe.
|
||||
|
||||
config UML_PCI_OVER_VFIO
|
||||
bool "Enable VFIO-based PCI passthrough"
|
||||
select UML_PCI
|
||||
help
|
||||
This driver provides support for VFIO-based PCI passthrough.
|
||||
Currently, only MSI-X capable devices are supported, and it
|
||||
is assumed that drivers will use MSI-X.
|
||||
|
|
|
@ -6,12 +6,7 @@
|
|||
# pcap is broken in 2.5 because kbuild doesn't allow pcap.a to be linked
|
||||
# in to pcap.o
|
||||
|
||||
slip-objs := slip_kern.o slip_user.o
|
||||
slirp-objs := slirp_kern.o slirp_user.o
|
||||
daemon-objs := daemon_kern.o daemon_user.o
|
||||
vector-objs := vector_kern.o vector_user.o vector_transports.o
|
||||
umcast-objs := umcast_kern.o umcast_user.o
|
||||
net-objs := net_kern.o net_user.o
|
||||
mconsole-objs := mconsole_kern.o mconsole_user.o
|
||||
hostaudio-objs := hostaudio_kern.o
|
||||
ubd-objs := ubd_kern.o ubd_user.o
|
||||
|
@ -19,13 +14,7 @@ port-objs := port_kern.o port_user.o
|
|||
harddog-objs := harddog_kern.o
|
||||
harddog-builtin-$(CONFIG_UML_WATCHDOG) := harddog_user.o harddog_user_exp.o
|
||||
rtc-objs := rtc_kern.o rtc_user.o
|
||||
|
||||
LDFLAGS_vde.o = $(shell $(CC) $(CFLAGS) -print-file-name=libvdeplug.a)
|
||||
|
||||
targets := vde_kern.o vde_user.o
|
||||
|
||||
$(obj)/vde.o: $(obj)/vde_kern.o $(obj)/vde_user.o
|
||||
$(LD) -r -dp -o $@ $^ $(ld_flags)
|
||||
vfio_uml-objs := vfio_kern.o vfio_user.o
|
||||
|
||||
#XXX: The call below does not work because the flags are added before the
|
||||
# object name, so nothing from the library gets linked.
|
||||
|
@ -38,13 +27,7 @@ obj-y := stdio_console.o fd.o chan_kern.o chan_user.o line.o
|
|||
obj-$(CONFIG_SSL) += ssl.o
|
||||
obj-$(CONFIG_STDERR_CONSOLE) += stderr_console.o
|
||||
|
||||
obj-$(CONFIG_UML_NET_SLIP) += slip.o slip_common.o
|
||||
obj-$(CONFIG_UML_NET_SLIRP) += slirp.o slip_common.o
|
||||
obj-$(CONFIG_UML_NET_DAEMON) += daemon.o
|
||||
obj-$(CONFIG_UML_NET_VECTOR) += vector.o
|
||||
obj-$(CONFIG_UML_NET_VDE) += vde.o
|
||||
obj-$(CONFIG_UML_NET_MCAST) += umcast.o
|
||||
obj-$(CONFIG_UML_NET) += net.o
|
||||
obj-$(CONFIG_MCONSOLE) += mconsole.o
|
||||
obj-$(CONFIG_MMAPPER) += mmapper_kern.o
|
||||
obj-$(CONFIG_BLK_DEV_UBD) += ubd.o
|
||||
|
@ -62,9 +45,10 @@ obj-$(CONFIG_VIRTIO_UML) += virtio_uml.o
|
|||
obj-$(CONFIG_UML_RTC) += rtc.o
|
||||
obj-$(CONFIG_UML_PCI) += virt-pci.o
|
||||
obj-$(CONFIG_UML_PCI_OVER_VIRTIO) += virtio_pcidev.o
|
||||
obj-$(CONFIG_UML_PCI_OVER_VFIO) += vfio_uml.o
|
||||
|
||||
# pcap_user.o must be added explicitly.
|
||||
USER_OBJS := fd.o null.o pty.o tty.o xterm.o slip_common.o vde_user.o vector_user.o
|
||||
USER_OBJS := fd.o null.o pty.o tty.o xterm.o vector_user.o
|
||||
CFLAGS_null.o = -DDEV_NULL=$(DEV_NULL_PATH)
|
||||
|
||||
CFLAGS_xterm.o += '-DCONFIG_XTERM_CHAN_DEFAULT_EMULATOR="$(CONFIG_XTERM_CHAN_DEFAULT_EMULATOR)"'
|
||||
|
|
|
@ -212,7 +212,7 @@ int enable_chan(struct line *line)
|
|||
* be permanently disabled. This is discovered in IRQ context, but
|
||||
* the freeing of the IRQ must be done later.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(irqs_to_free_lock);
|
||||
static DEFINE_RAW_SPINLOCK(irqs_to_free_lock);
|
||||
static LIST_HEAD(irqs_to_free);
|
||||
|
||||
void free_irqs(void)
|
||||
|
@ -222,9 +222,9 @@ void free_irqs(void)
|
|||
struct list_head *ele;
|
||||
unsigned long flags;
|
||||
|
||||
spin_lock_irqsave(&irqs_to_free_lock, flags);
|
||||
raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
|
||||
list_splice_init(&irqs_to_free, &list);
|
||||
spin_unlock_irqrestore(&irqs_to_free_lock, flags);
|
||||
raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
|
||||
|
||||
list_for_each(ele, &list) {
|
||||
chan = list_entry(ele, struct chan, free_list);
|
||||
|
@ -246,9 +246,9 @@ static void close_one_chan(struct chan *chan, int delay_free_irq)
|
|||
return;
|
||||
|
||||
if (delay_free_irq) {
|
||||
spin_lock_irqsave(&irqs_to_free_lock, flags);
|
||||
raw_spin_lock_irqsave(&irqs_to_free_lock, flags);
|
||||
list_add(&chan->free_list, &irqs_to_free);
|
||||
spin_unlock_irqrestore(&irqs_to_free_lock, flags);
|
||||
raw_spin_unlock_irqrestore(&irqs_to_free_lock, flags);
|
||||
} else {
|
||||
if (chan->input && chan->enabled)
|
||||
um_free_irq(chan->line->read_irq, chan);
|
||||
|
|
|
@ -1,29 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#ifndef __DAEMON_H__
|
||||
#define __DAEMON_H__
|
||||
|
||||
#include <net_user.h>
|
||||
|
||||
#define SWITCH_VERSION 3
|
||||
|
||||
struct daemon_data {
|
||||
char *sock_type;
|
||||
char *ctl_sock;
|
||||
void *ctl_addr;
|
||||
void *data_addr;
|
||||
void *local_addr;
|
||||
int fd;
|
||||
int control;
|
||||
void *dev;
|
||||
};
|
||||
|
||||
extern const struct net_user_info daemon_user_info;
|
||||
|
||||
extern int daemon_user_write(int fd, void *buf, int len,
|
||||
struct daemon_data *pri);
|
||||
|
||||
#endif
|
|
@ -1,95 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <net_kern.h>
|
||||
#include "daemon.h"
|
||||
|
||||
struct daemon_init {
|
||||
char *sock_type;
|
||||
char *ctl_sock;
|
||||
};
|
||||
|
||||
static void daemon_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct uml_net_private *pri;
|
||||
struct daemon_data *dpri;
|
||||
struct daemon_init *init = data;
|
||||
|
||||
pri = netdev_priv(dev);
|
||||
dpri = (struct daemon_data *) pri->user;
|
||||
dpri->sock_type = init->sock_type;
|
||||
dpri->ctl_sock = init->ctl_sock;
|
||||
dpri->fd = -1;
|
||||
dpri->control = -1;
|
||||
dpri->dev = dev;
|
||||
/* We will free this pointer. If it contains crap we're burned. */
|
||||
dpri->ctl_addr = NULL;
|
||||
dpri->data_addr = NULL;
|
||||
dpri->local_addr = NULL;
|
||||
|
||||
printk("daemon backend (uml_switch version %d) - %s:%s",
|
||||
SWITCH_VERSION, dpri->sock_type, dpri->ctl_sock);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
static int daemon_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return net_recvfrom(fd, skb_mac_header(skb),
|
||||
skb->dev->mtu + ETH_HEADER_OTHER);
|
||||
}
|
||||
|
||||
static int daemon_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return daemon_user_write(fd, skb->data, skb->len,
|
||||
(struct daemon_data *) &lp->user);
|
||||
}
|
||||
|
||||
static const struct net_kern_info daemon_kern_info = {
|
||||
.init = daemon_init,
|
||||
.protocol = eth_protocol,
|
||||
.read = daemon_read,
|
||||
.write = daemon_write,
|
||||
};
|
||||
|
||||
static int daemon_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct daemon_init *init = data;
|
||||
char *remain;
|
||||
|
||||
*init = ((struct daemon_init)
|
||||
{ .sock_type = "unix",
|
||||
.ctl_sock = CONFIG_UML_NET_DAEMON_DEFAULT_SOCK });
|
||||
|
||||
remain = split_if_spec(str, mac_out, &init->sock_type, &init->ctl_sock,
|
||||
NULL);
|
||||
if (remain != NULL)
|
||||
printk(KERN_WARNING "daemon_setup : Ignoring data socket "
|
||||
"specification\n");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport daemon_transport = {
|
||||
.list = LIST_HEAD_INIT(daemon_transport.list),
|
||||
.name = "daemon",
|
||||
.setup = daemon_setup,
|
||||
.user = &daemon_user_info,
|
||||
.kern = &daemon_kern_info,
|
||||
.private_size = sizeof(struct daemon_data),
|
||||
.setup_size = sizeof(struct daemon_init),
|
||||
};
|
||||
|
||||
static int register_daemon(void)
|
||||
{
|
||||
register_transport(&daemon_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_daemon);
|
|
@ -1,194 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/un.h>
|
||||
#include "daemon.h"
|
||||
#include <net_user.h>
|
||||
#include <os.h>
|
||||
#include <um_malloc.h>
|
||||
|
||||
enum request_type { REQ_NEW_CONTROL };
|
||||
|
||||
#define SWITCH_MAGIC 0xfeedface
|
||||
|
||||
struct request_v3 {
|
||||
uint32_t magic;
|
||||
uint32_t version;
|
||||
enum request_type type;
|
||||
struct sockaddr_un sock;
|
||||
};
|
||||
|
||||
static struct sockaddr_un *new_addr(void *name, int len)
|
||||
{
|
||||
struct sockaddr_un *sun;
|
||||
|
||||
sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
|
||||
if (sun == NULL) {
|
||||
printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
|
||||
"failed\n");
|
||||
return NULL;
|
||||
}
|
||||
sun->sun_family = AF_UNIX;
|
||||
memcpy(sun->sun_path, name, len);
|
||||
return sun;
|
||||
}
|
||||
|
||||
static int connect_to_switch(struct daemon_data *pri)
|
||||
{
|
||||
struct sockaddr_un *ctl_addr = pri->ctl_addr;
|
||||
struct sockaddr_un *local_addr = pri->local_addr;
|
||||
struct sockaddr_un *sun;
|
||||
struct request_v3 req;
|
||||
int fd, n, err;
|
||||
|
||||
pri->control = socket(AF_UNIX, SOCK_STREAM, 0);
|
||||
if (pri->control < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "daemon_open : control socket failed, "
|
||||
"errno = %d\n", -err);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (connect(pri->control, (struct sockaddr *) ctl_addr,
|
||||
sizeof(*ctl_addr)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "daemon_open : control connect failed, "
|
||||
"errno = %d\n", -err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd = socket(AF_UNIX, SOCK_DGRAM, 0);
|
||||
if (fd < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "daemon_open : data socket failed, "
|
||||
"errno = %d\n", -err);
|
||||
goto out;
|
||||
}
|
||||
if (bind(fd, (struct sockaddr *) local_addr, sizeof(*local_addr)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "daemon_open : data bind failed, "
|
||||
"errno = %d\n", -err);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
sun = uml_kmalloc(sizeof(struct sockaddr_un), UM_GFP_KERNEL);
|
||||
if (sun == NULL) {
|
||||
printk(UM_KERN_ERR "new_addr: allocation of sockaddr_un "
|
||||
"failed\n");
|
||||
err = -ENOMEM;
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
req.magic = SWITCH_MAGIC;
|
||||
req.version = SWITCH_VERSION;
|
||||
req.type = REQ_NEW_CONTROL;
|
||||
req.sock = *local_addr;
|
||||
n = write(pri->control, &req, sizeof(req));
|
||||
if (n != sizeof(req)) {
|
||||
printk(UM_KERN_ERR "daemon_open : control setup request "
|
||||
"failed, err = %d\n", -errno);
|
||||
err = -ENOTCONN;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
n = read(pri->control, sun, sizeof(*sun));
|
||||
if (n != sizeof(*sun)) {
|
||||
printk(UM_KERN_ERR "daemon_open : read of data socket failed, "
|
||||
"err = %d\n", -errno);
|
||||
err = -ENOTCONN;
|
||||
goto out_free;
|
||||
}
|
||||
|
||||
pri->data_addr = sun;
|
||||
return fd;
|
||||
|
||||
out_free:
|
||||
kfree(sun);
|
||||
out_close:
|
||||
close(fd);
|
||||
out:
|
||||
close(pri->control);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int daemon_user_init(void *data, void *dev)
|
||||
{
|
||||
struct daemon_data *pri = data;
|
||||
struct timeval tv;
|
||||
struct {
|
||||
char zero;
|
||||
int pid;
|
||||
int usecs;
|
||||
} name;
|
||||
|
||||
if (!strcmp(pri->sock_type, "unix"))
|
||||
pri->ctl_addr = new_addr(pri->ctl_sock,
|
||||
strlen(pri->ctl_sock) + 1);
|
||||
name.zero = 0;
|
||||
name.pid = os_getpid();
|
||||
gettimeofday(&tv, NULL);
|
||||
name.usecs = tv.tv_usec;
|
||||
pri->local_addr = new_addr(&name, sizeof(name));
|
||||
pri->dev = dev;
|
||||
pri->fd = connect_to_switch(pri);
|
||||
if (pri->fd < 0) {
|
||||
kfree(pri->local_addr);
|
||||
pri->local_addr = NULL;
|
||||
return pri->fd;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int daemon_open(void *data)
|
||||
{
|
||||
struct daemon_data *pri = data;
|
||||
return pri->fd;
|
||||
}
|
||||
|
||||
static void daemon_remove(void *data)
|
||||
{
|
||||
struct daemon_data *pri = data;
|
||||
|
||||
close(pri->fd);
|
||||
pri->fd = -1;
|
||||
close(pri->control);
|
||||
pri->control = -1;
|
||||
|
||||
kfree(pri->data_addr);
|
||||
pri->data_addr = NULL;
|
||||
kfree(pri->ctl_addr);
|
||||
pri->ctl_addr = NULL;
|
||||
kfree(pri->local_addr);
|
||||
pri->local_addr = NULL;
|
||||
}
|
||||
|
||||
int daemon_user_write(int fd, void *buf, int len, struct daemon_data *pri)
|
||||
{
|
||||
struct sockaddr_un *data_addr = pri->data_addr;
|
||||
|
||||
return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
|
||||
}
|
||||
|
||||
const struct net_user_info daemon_user_info = {
|
||||
.init = daemon_user_init,
|
||||
.open = daemon_open,
|
||||
.close = NULL,
|
||||
.remove = daemon_remove,
|
||||
.add_address = NULL,
|
||||
.delete_address = NULL,
|
||||
.mtu = ETH_MAX_PACKET,
|
||||
.max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
|
||||
};
|
|
@ -1,889 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*/
|
||||
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/ethtool.h>
|
||||
#include <linux/inetdevice.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/rtnetlink.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
#include <init.h>
|
||||
#include <irq_kern.h>
|
||||
#include <irq_user.h>
|
||||
#include "mconsole_kern.h"
|
||||
#include <net_kern.h>
|
||||
#include <net_user.h>
|
||||
|
||||
#define DRIVER_NAME "uml-netdev"
|
||||
|
||||
static DEFINE_SPINLOCK(opened_lock);
|
||||
static LIST_HEAD(opened);
|
||||
|
||||
/*
|
||||
* The drop_skb is used when we can't allocate an skb. The
|
||||
* packet is read into drop_skb in order to get the data off the
|
||||
* connection to the host.
|
||||
* It is reallocated whenever a maximum packet size is seen which is
|
||||
* larger than any seen before. update_drop_skb is called from
|
||||
* eth_configure when a new interface is added.
|
||||
*/
|
||||
static DEFINE_SPINLOCK(drop_lock);
|
||||
static struct sk_buff *drop_skb;
|
||||
static int drop_max;
|
||||
|
||||
static int update_drop_skb(int max)
|
||||
{
|
||||
struct sk_buff *new;
|
||||
unsigned long flags;
|
||||
int err = 0;
|
||||
|
||||
spin_lock_irqsave(&drop_lock, flags);
|
||||
|
||||
if (max <= drop_max)
|
||||
goto out;
|
||||
|
||||
err = -ENOMEM;
|
||||
new = dev_alloc_skb(max);
|
||||
if (new == NULL)
|
||||
goto out;
|
||||
|
||||
skb_put(new, max);
|
||||
|
||||
kfree_skb(drop_skb);
|
||||
drop_skb = new;
|
||||
drop_max = max;
|
||||
err = 0;
|
||||
out:
|
||||
spin_unlock_irqrestore(&drop_lock, flags);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int uml_net_rx(struct net_device *dev)
|
||||
{
|
||||
struct uml_net_private *lp = netdev_priv(dev);
|
||||
int pkt_len;
|
||||
struct sk_buff *skb;
|
||||
|
||||
/* If we can't allocate memory, try again next round. */
|
||||
skb = dev_alloc_skb(lp->max_packet);
|
||||
if (skb == NULL) {
|
||||
drop_skb->dev = dev;
|
||||
/* Read a packet into drop_skb and don't do anything with it. */
|
||||
(*lp->read)(lp->fd, drop_skb, lp);
|
||||
dev->stats.rx_dropped++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
skb->dev = dev;
|
||||
skb_put(skb, lp->max_packet);
|
||||
skb_reset_mac_header(skb);
|
||||
pkt_len = (*lp->read)(lp->fd, skb, lp);
|
||||
|
||||
if (pkt_len > 0) {
|
||||
skb_trim(skb, pkt_len);
|
||||
skb->protocol = (*lp->protocol)(skb);
|
||||
|
||||
dev->stats.rx_bytes += skb->len;
|
||||
dev->stats.rx_packets++;
|
||||
netif_rx(skb);
|
||||
return pkt_len;
|
||||
}
|
||||
|
||||
kfree_skb(skb);
|
||||
return pkt_len;
|
||||
}
|
||||
|
||||
static void uml_dev_close(struct work_struct *work)
|
||||
{
|
||||
struct uml_net_private *lp =
|
||||
container_of(work, struct uml_net_private, work);
|
||||
dev_close(lp->dev);
|
||||
}
|
||||
|
||||
static irqreturn_t uml_net_interrupt(int irq, void *dev_id)
|
||||
{
|
||||
struct net_device *dev = dev_id;
|
||||
struct uml_net_private *lp = netdev_priv(dev);
|
||||
int err;
|
||||
|
||||
if (!netif_running(dev))
|
||||
return IRQ_NONE;
|
||||
|
||||
spin_lock(&lp->lock);
|
||||
while ((err = uml_net_rx(dev)) > 0) ;
|
||||
if (err < 0) {
|
||||
printk(KERN_ERR
|
||||
"Device '%s' read returned %d, shutting it down\n",
|
||||
dev->name, err);
|
||||
/* dev_close can't be called in interrupt context, and takes
|
||||
* again lp->lock.
|
||||
* And dev_close() can be safely called multiple times on the
|
||||
* same device, since it tests for (dev->flags & IFF_UP). So
|
||||
* there's no harm in delaying the device shutdown.
|
||||
* Furthermore, the workqueue will not re-enqueue an already
|
||||
* enqueued work item. */
|
||||
schedule_work(&lp->work);
|
||||
goto out;
|
||||
}
|
||||
out:
|
||||
spin_unlock(&lp->lock);
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int uml_net_open(struct net_device *dev)
|
||||
{
|
||||
struct uml_net_private *lp = netdev_priv(dev);
|
||||
int err;
|
||||
|
||||
if (lp->fd >= 0) {
|
||||
err = -ENXIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
lp->fd = (*lp->open)(&lp->user);
|
||||
if (lp->fd < 0) {
|
||||
err = lp->fd;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = um_request_irq(dev->irq, lp->fd, IRQ_READ, uml_net_interrupt,
|
||||
IRQF_SHARED, dev->name, dev);
|
||||
if (err < 0) {
|
||||
printk(KERN_ERR "uml_net_open: failed to get irq(%d)\n", err);
|
||||
err = -ENETUNREACH;
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
netif_start_queue(dev);
|
||||
|
||||
/* clear buffer - it can happen that the host side of the interface
|
||||
* is full when we get here. In this case, new data is never queued,
|
||||
* SIGIOs never arrive, and the net never works.
|
||||
*/
|
||||
while ((err = uml_net_rx(dev)) > 0) ;
|
||||
|
||||
spin_lock(&opened_lock);
|
||||
list_add(&lp->list, &opened);
|
||||
spin_unlock(&opened_lock);
|
||||
|
||||
return 0;
|
||||
out_close:
|
||||
if (lp->close != NULL) (*lp->close)(lp->fd, &lp->user);
|
||||
lp->fd = -1;
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int uml_net_close(struct net_device *dev)
|
||||
{
|
||||
struct uml_net_private *lp = netdev_priv(dev);
|
||||
|
||||
netif_stop_queue(dev);
|
||||
|
||||
um_free_irq(dev->irq, dev);
|
||||
if (lp->close != NULL)
|
||||
(*lp->close)(lp->fd, &lp->user);
|
||||
lp->fd = -1;
|
||||
|
||||
spin_lock(&opened_lock);
|
||||
list_del(&lp->list);
|
||||
spin_unlock(&opened_lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static netdev_tx_t uml_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||
{
|
||||
struct uml_net_private *lp = netdev_priv(dev);
|
||||
unsigned long flags;
|
||||
int len;
|
||||
|
||||
netif_stop_queue(dev);
|
||||
|
||||
spin_lock_irqsave(&lp->lock, flags);
|
||||
|
||||
len = (*lp->write)(lp->fd, skb, lp);
|
||||
skb_tx_timestamp(skb);
|
||||
|
||||
if (len == skb->len) {
|
||||
dev->stats.tx_packets++;
|
||||
dev->stats.tx_bytes += skb->len;
|
||||
netif_trans_update(dev);
|
||||
netif_start_queue(dev);
|
||||
|
||||
/* this is normally done in the interrupt when tx finishes */
|
||||
netif_wake_queue(dev);
|
||||
}
|
||||
else if (len == 0) {
|
||||
netif_start_queue(dev);
|
||||
dev->stats.tx_dropped++;
|
||||
}
|
||||
else {
|
||||
netif_start_queue(dev);
|
||||
printk(KERN_ERR "uml_net_start_xmit: failed(%d)\n", len);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&lp->lock, flags);
|
||||
|
||||
dev_consume_skb_any(skb);
|
||||
|
||||
return NETDEV_TX_OK;
|
||||
}
|
||||
|
||||
static void uml_net_set_multicast_list(struct net_device *dev)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static void uml_net_tx_timeout(struct net_device *dev, unsigned int txqueue)
|
||||
{
|
||||
netif_trans_update(dev);
|
||||
netif_wake_queue(dev);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_NET_POLL_CONTROLLER
|
||||
static void uml_net_poll_controller(struct net_device *dev)
|
||||
{
|
||||
disable_irq(dev->irq);
|
||||
uml_net_interrupt(dev->irq, dev);
|
||||
enable_irq(dev->irq);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void uml_net_get_drvinfo(struct net_device *dev,
|
||||
struct ethtool_drvinfo *info)
|
||||
{
|
||||
strscpy(info->driver, DRIVER_NAME);
|
||||
}
|
||||
|
||||
static const struct ethtool_ops uml_net_ethtool_ops = {
|
||||
.get_drvinfo = uml_net_get_drvinfo,
|
||||
.get_link = ethtool_op_get_link,
|
||||
.get_ts_info = ethtool_op_get_ts_info,
|
||||
};
|
||||
|
||||
void uml_net_setup_etheraddr(struct net_device *dev, char *str)
|
||||
{
|
||||
u8 addr[ETH_ALEN];
|
||||
char *end;
|
||||
int i;
|
||||
|
||||
if (str == NULL)
|
||||
goto random;
|
||||
|
||||
for (i = 0; i < 6; i++) {
|
||||
addr[i] = simple_strtoul(str, &end, 16);
|
||||
if ((end == str) ||
|
||||
((*end != ':') && (*end != ',') && (*end != '\0'))) {
|
||||
printk(KERN_ERR
|
||||
"setup_etheraddr: failed to parse '%s' "
|
||||
"as an ethernet address\n", str);
|
||||
goto random;
|
||||
}
|
||||
str = end + 1;
|
||||
}
|
||||
if (is_multicast_ether_addr(addr)) {
|
||||
printk(KERN_ERR
|
||||
"Attempt to assign a multicast ethernet address to a "
|
||||
"device disallowed\n");
|
||||
goto random;
|
||||
}
|
||||
if (!is_valid_ether_addr(addr)) {
|
||||
printk(KERN_ERR
|
||||
"Attempt to assign an invalid ethernet address to a "
|
||||
"device disallowed\n");
|
||||
goto random;
|
||||
}
|
||||
if (!is_local_ether_addr(addr)) {
|
||||
printk(KERN_WARNING
|
||||
"Warning: Assigning a globally valid ethernet "
|
||||
"address to a device\n");
|
||||
printk(KERN_WARNING "You should set the 2nd rightmost bit in "
|
||||
"the first byte of the MAC,\n");
|
||||
printk(KERN_WARNING "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
|
||||
addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4],
|
||||
addr[5]);
|
||||
}
|
||||
eth_hw_addr_set(dev, addr);
|
||||
return;
|
||||
|
||||
random:
|
||||
printk(KERN_INFO
|
||||
"Choosing a random ethernet address for device %s\n", dev->name);
|
||||
eth_hw_addr_random(dev);
|
||||
}
|
||||
|
||||
static DEFINE_SPINLOCK(devices_lock);
|
||||
static LIST_HEAD(devices);
|
||||
|
||||
static struct platform_driver uml_net_driver = {
|
||||
.driver = {
|
||||
.name = DRIVER_NAME,
|
||||
},
|
||||
};
|
||||
|
||||
static void net_device_release(struct device *dev)
|
||||
{
|
||||
struct uml_net *device = container_of(dev, struct uml_net, pdev.dev);
|
||||
struct net_device *netdev = device->dev;
|
||||
struct uml_net_private *lp = netdev_priv(netdev);
|
||||
|
||||
if (lp->remove != NULL)
|
||||
(*lp->remove)(&lp->user);
|
||||
list_del(&device->list);
|
||||
kfree(device);
|
||||
free_netdev(netdev);
|
||||
}
|
||||
|
||||
static const struct net_device_ops uml_netdev_ops = {
|
||||
.ndo_open = uml_net_open,
|
||||
.ndo_stop = uml_net_close,
|
||||
.ndo_start_xmit = uml_net_start_xmit,
|
||||
.ndo_set_rx_mode = uml_net_set_multicast_list,
|
||||
.ndo_tx_timeout = uml_net_tx_timeout,
|
||||
.ndo_set_mac_address = eth_mac_addr,
|
||||
.ndo_validate_addr = eth_validate_addr,
|
||||
#ifdef CONFIG_NET_POLL_CONTROLLER
|
||||
.ndo_poll_controller = uml_net_poll_controller,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
* Ensures that platform_driver_register is called only once by
|
||||
* eth_configure. Will be set in an initcall.
|
||||
*/
|
||||
static int driver_registered;
|
||||
|
||||
static void eth_configure(int n, void *init, char *mac,
|
||||
struct transport *transport, gfp_t gfp_mask)
|
||||
{
|
||||
struct uml_net *device;
|
||||
struct net_device *dev;
|
||||
struct uml_net_private *lp;
|
||||
int err, size;
|
||||
|
||||
size = transport->private_size + sizeof(struct uml_net_private);
|
||||
|
||||
device = kzalloc(sizeof(*device), gfp_mask);
|
||||
if (device == NULL) {
|
||||
printk(KERN_ERR "eth_configure failed to allocate struct "
|
||||
"uml_net\n");
|
||||
return;
|
||||
}
|
||||
|
||||
dev = alloc_etherdev(size);
|
||||
if (dev == NULL) {
|
||||
printk(KERN_ERR "eth_configure: failed to allocate struct "
|
||||
"net_device for eth%d\n", n);
|
||||
goto out_free_device;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&device->list);
|
||||
device->index = n;
|
||||
|
||||
/* If this name ends up conflicting with an existing registered
|
||||
* netdevice, that is OK, register_netdev{,ice}() will notice this
|
||||
* and fail.
|
||||
*/
|
||||
snprintf(dev->name, sizeof(dev->name), "eth%d", n);
|
||||
|
||||
uml_net_setup_etheraddr(dev, mac);
|
||||
|
||||
printk(KERN_INFO "Netdevice %d (%pM) : ", n, dev->dev_addr);
|
||||
|
||||
lp = netdev_priv(dev);
|
||||
/* This points to the transport private data. It's still clear, but we
|
||||
* must memset it to 0 *now*. Let's help the drivers. */
|
||||
memset(lp, 0, size);
|
||||
INIT_WORK(&lp->work, uml_dev_close);
|
||||
|
||||
/* sysfs register */
|
||||
if (!driver_registered) {
|
||||
platform_driver_register(¨_net_driver);
|
||||
driver_registered = 1;
|
||||
}
|
||||
device->pdev.id = n;
|
||||
device->pdev.name = DRIVER_NAME;
|
||||
device->pdev.dev.release = net_device_release;
|
||||
dev_set_drvdata(&device->pdev.dev, device);
|
||||
if (platform_device_register(&device->pdev))
|
||||
goto out_free_netdev;
|
||||
SET_NETDEV_DEV(dev,&device->pdev.dev);
|
||||
|
||||
device->dev = dev;
|
||||
|
||||
/*
|
||||
* These just fill in a data structure, so there's no failure
|
||||
* to be worried about.
|
||||
*/
|
||||
(*transport->kern->init)(dev, init);
|
||||
|
||||
*lp = ((struct uml_net_private)
|
||||
{ .list = LIST_HEAD_INIT(lp->list),
|
||||
.dev = dev,
|
||||
.fd = -1,
|
||||
.mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
|
||||
.max_packet = transport->user->max_packet,
|
||||
.protocol = transport->kern->protocol,
|
||||
.open = transport->user->open,
|
||||
.close = transport->user->close,
|
||||
.remove = transport->user->remove,
|
||||
.read = transport->kern->read,
|
||||
.write = transport->kern->write,
|
||||
.add_address = transport->user->add_address,
|
||||
.delete_address = transport->user->delete_address });
|
||||
|
||||
spin_lock_init(&lp->lock);
|
||||
memcpy(lp->mac, dev->dev_addr, sizeof(lp->mac));
|
||||
|
||||
if ((transport->user->init != NULL) &&
|
||||
((*transport->user->init)(&lp->user, dev) != 0))
|
||||
goto out_unregister;
|
||||
|
||||
dev->mtu = transport->user->mtu;
|
||||
dev->netdev_ops = ¨_netdev_ops;
|
||||
dev->ethtool_ops = ¨_net_ethtool_ops;
|
||||
dev->watchdog_timeo = (HZ >> 1);
|
||||
dev->irq = UM_ETH_IRQ;
|
||||
|
||||
err = update_drop_skb(lp->max_packet);
|
||||
if (err)
|
||||
goto out_undo_user_init;
|
||||
|
||||
rtnl_lock();
|
||||
err = register_netdevice(dev);
|
||||
rtnl_unlock();
|
||||
if (err)
|
||||
goto out_undo_user_init;
|
||||
|
||||
spin_lock(&devices_lock);
|
||||
list_add(&device->list, &devices);
|
||||
spin_unlock(&devices_lock);
|
||||
|
||||
return;
|
||||
|
||||
out_undo_user_init:
|
||||
if (transport->user->remove != NULL)
|
||||
(*transport->user->remove)(&lp->user);
|
||||
out_unregister:
|
||||
platform_device_unregister(&device->pdev);
|
||||
return; /* platform_device_unregister frees dev and device */
|
||||
out_free_netdev:
|
||||
free_netdev(dev);
|
||||
out_free_device:
|
||||
kfree(device);
|
||||
}
|
||||
|
||||
static struct uml_net *find_device(int n)
|
||||
{
|
||||
struct uml_net *device;
|
||||
struct list_head *ele;
|
||||
|
||||
spin_lock(&devices_lock);
|
||||
list_for_each(ele, &devices) {
|
||||
device = list_entry(ele, struct uml_net, list);
|
||||
if (device->index == n)
|
||||
goto out;
|
||||
}
|
||||
device = NULL;
|
||||
out:
|
||||
spin_unlock(&devices_lock);
|
||||
return device;
|
||||
}
|
||||
|
||||
static int eth_parse(char *str, int *index_out, char **str_out,
|
||||
char **error_out)
|
||||
{
|
||||
char *end;
|
||||
int n, err = -EINVAL;
|
||||
|
||||
n = simple_strtoul(str, &end, 0);
|
||||
if (end == str) {
|
||||
*error_out = "Bad device number";
|
||||
return err;
|
||||
}
|
||||
|
||||
str = end;
|
||||
if (*str != '=') {
|
||||
*error_out = "Expected '=' after device number";
|
||||
return err;
|
||||
}
|
||||
|
||||
str++;
|
||||
if (find_device(n)) {
|
||||
*error_out = "Device already configured";
|
||||
return err;
|
||||
}
|
||||
|
||||
*index_out = n;
|
||||
*str_out = str;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct eth_init {
|
||||
struct list_head list;
|
||||
char *init;
|
||||
int index;
|
||||
};
|
||||
|
||||
static DEFINE_SPINLOCK(transports_lock);
|
||||
static LIST_HEAD(transports);
|
||||
|
||||
/* Filled in during early boot */
|
||||
static LIST_HEAD(eth_cmd_line);
|
||||
|
||||
static int check_transport(struct transport *transport, char *eth, int n,
|
||||
void **init_out, char **mac_out, gfp_t gfp_mask)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = strlen(transport->name);
|
||||
if (strncmp(eth, transport->name, len))
|
||||
return 0;
|
||||
|
||||
eth += len;
|
||||
if (*eth == ',')
|
||||
eth++;
|
||||
else if (*eth != '\0')
|
||||
return 0;
|
||||
|
||||
*init_out = kmalloc(transport->setup_size, gfp_mask);
|
||||
if (*init_out == NULL)
|
||||
return 1;
|
||||
|
||||
if (!transport->setup(eth, mac_out, *init_out)) {
|
||||
kfree(*init_out);
|
||||
*init_out = NULL;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
void register_transport(struct transport *new)
|
||||
{
|
||||
struct list_head *ele, *next;
|
||||
struct eth_init *eth;
|
||||
void *init;
|
||||
char *mac = NULL;
|
||||
int match;
|
||||
|
||||
spin_lock(&transports_lock);
|
||||
BUG_ON(!list_empty(&new->list));
|
||||
list_add(&new->list, &transports);
|
||||
spin_unlock(&transports_lock);
|
||||
|
||||
list_for_each_safe(ele, next, ð_cmd_line) {
|
||||
eth = list_entry(ele, struct eth_init, list);
|
||||
match = check_transport(new, eth->init, eth->index, &init,
|
||||
&mac, GFP_KERNEL);
|
||||
if (!match)
|
||||
continue;
|
||||
else if (init != NULL) {
|
||||
eth_configure(eth->index, init, mac, new, GFP_KERNEL);
|
||||
kfree(init);
|
||||
}
|
||||
list_del(ð->list);
|
||||
}
|
||||
}
|
||||
|
||||
static int eth_setup_common(char *str, int index)
|
||||
{
|
||||
struct list_head *ele;
|
||||
struct transport *transport;
|
||||
void *init;
|
||||
char *mac = NULL;
|
||||
int found = 0;
|
||||
|
||||
spin_lock(&transports_lock);
|
||||
list_for_each(ele, &transports) {
|
||||
transport = list_entry(ele, struct transport, list);
|
||||
if (!check_transport(transport, str, index, &init,
|
||||
&mac, GFP_ATOMIC))
|
||||
continue;
|
||||
if (init != NULL) {
|
||||
eth_configure(index, init, mac, transport, GFP_ATOMIC);
|
||||
kfree(init);
|
||||
}
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
spin_unlock(&transports_lock);
|
||||
return found;
|
||||
}
|
||||
|
||||
static int __init eth_setup(char *str)
|
||||
{
|
||||
struct eth_init *new;
|
||||
char *error;
|
||||
int n, err;
|
||||
|
||||
err = eth_parse(str, &n, &str, &error);
|
||||
if (err) {
|
||||
printk(KERN_ERR "eth_setup - Couldn't parse '%s' : %s\n",
|
||||
str, error);
|
||||
return 1;
|
||||
}
|
||||
|
||||
new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
|
||||
|
||||
INIT_LIST_HEAD(&new->list);
|
||||
new->index = n;
|
||||
new->init = str;
|
||||
|
||||
list_add_tail(&new->list, ð_cmd_line);
|
||||
return 1;
|
||||
}
|
||||
|
||||
__setup("eth", eth_setup);
|
||||
__uml_help(eth_setup,
|
||||
"eth[0-9]+=<transport>,<options>\n"
|
||||
" Configure a network device.\n\n"
|
||||
);
|
||||
|
||||
static int net_config(char *str, char **error_out)
|
||||
{
|
||||
int n, err;
|
||||
|
||||
err = eth_parse(str, &n, &str, error_out);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
/* This string is broken up and the pieces used by the underlying
|
||||
* driver. So, it is freed only if eth_setup_common fails.
|
||||
*/
|
||||
str = kstrdup(str, GFP_KERNEL);
|
||||
if (str == NULL) {
|
||||
*error_out = "net_config failed to strdup string";
|
||||
return -ENOMEM;
|
||||
}
|
||||
err = !eth_setup_common(str, n);
|
||||
if (err)
|
||||
kfree(str);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int net_id(char **str, int *start_out, int *end_out)
|
||||
{
|
||||
char *end;
|
||||
int n;
|
||||
|
||||
n = simple_strtoul(*str, &end, 0);
|
||||
if ((*end != '\0') || (end == *str))
|
||||
return -1;
|
||||
|
||||
*start_out = n;
|
||||
*end_out = n;
|
||||
*str = end;
|
||||
return n;
|
||||
}
|
||||
|
||||
static int net_remove(int n, char **error_out)
|
||||
{
|
||||
struct uml_net *device;
|
||||
struct net_device *dev;
|
||||
struct uml_net_private *lp;
|
||||
|
||||
device = find_device(n);
|
||||
if (device == NULL)
|
||||
return -ENODEV;
|
||||
|
||||
dev = device->dev;
|
||||
lp = netdev_priv(dev);
|
||||
if (lp->fd > 0)
|
||||
return -EBUSY;
|
||||
unregister_netdev(dev);
|
||||
platform_device_unregister(&device->pdev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct mc_device net_mc = {
|
||||
.list = LIST_HEAD_INIT(net_mc.list),
|
||||
.name = "eth",
|
||||
.config = net_config,
|
||||
.get_config = NULL,
|
||||
.id = net_id,
|
||||
.remove = net_remove,
|
||||
};
|
||||
|
||||
#ifdef CONFIG_INET
|
||||
static int uml_inetaddr_event(struct notifier_block *this, unsigned long event,
|
||||
void *ptr)
|
||||
{
|
||||
struct in_ifaddr *ifa = ptr;
|
||||
struct net_device *dev = ifa->ifa_dev->dev;
|
||||
struct uml_net_private *lp;
|
||||
void (*proc)(unsigned char *, unsigned char *, void *);
|
||||
unsigned char addr_buf[4], netmask_buf[4];
|
||||
|
||||
if (dev->netdev_ops->ndo_open != uml_net_open)
|
||||
return NOTIFY_DONE;
|
||||
|
||||
lp = netdev_priv(dev);
|
||||
|
||||
proc = NULL;
|
||||
switch (event) {
|
||||
case NETDEV_UP:
|
||||
proc = lp->add_address;
|
||||
break;
|
||||
case NETDEV_DOWN:
|
||||
proc = lp->delete_address;
|
||||
break;
|
||||
}
|
||||
if (proc != NULL) {
|
||||
memcpy(addr_buf, &ifa->ifa_address, sizeof(addr_buf));
|
||||
memcpy(netmask_buf, &ifa->ifa_mask, sizeof(netmask_buf));
|
||||
(*proc)(addr_buf, netmask_buf, &lp->user);
|
||||
}
|
||||
return NOTIFY_DONE;
|
||||
}
|
||||
|
||||
/* uml_net_init shouldn't be called twice on two CPUs at the same time */
|
||||
static struct notifier_block uml_inetaddr_notifier = {
|
||||
.notifier_call = uml_inetaddr_event,
|
||||
};
|
||||
|
||||
static void inet_register(void)
|
||||
{
|
||||
struct list_head *ele;
|
||||
struct uml_net_private *lp;
|
||||
struct in_device *ip;
|
||||
struct in_ifaddr *in;
|
||||
|
||||
register_inetaddr_notifier(¨_inetaddr_notifier);
|
||||
|
||||
/* Devices may have been opened already, so the uml_inetaddr_notifier
|
||||
* didn't get a chance to run for them. This fakes it so that
|
||||
* addresses which have already been set up get handled properly.
|
||||
*/
|
||||
spin_lock(&opened_lock);
|
||||
list_for_each(ele, &opened) {
|
||||
lp = list_entry(ele, struct uml_net_private, list);
|
||||
ip = lp->dev->ip_ptr;
|
||||
if (ip == NULL)
|
||||
continue;
|
||||
in = ip->ifa_list;
|
||||
while (in != NULL) {
|
||||
uml_inetaddr_event(NULL, NETDEV_UP, in);
|
||||
in = in->ifa_next;
|
||||
}
|
||||
}
|
||||
spin_unlock(&opened_lock);
|
||||
}
|
||||
#else
|
||||
static inline void inet_register(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
static int uml_net_init(void)
|
||||
{
|
||||
mconsole_register_dev(&net_mc);
|
||||
inet_register();
|
||||
return 0;
|
||||
}
|
||||
|
||||
__initcall(uml_net_init);
|
||||
|
||||
static void close_devices(void)
|
||||
{
|
||||
struct list_head *ele;
|
||||
struct uml_net_private *lp;
|
||||
|
||||
spin_lock(&opened_lock);
|
||||
list_for_each(ele, &opened) {
|
||||
lp = list_entry(ele, struct uml_net_private, list);
|
||||
um_free_irq(lp->dev->irq, lp->dev);
|
||||
if ((lp->close != NULL) && (lp->fd >= 0))
|
||||
(*lp->close)(lp->fd, &lp->user);
|
||||
if (lp->remove != NULL)
|
||||
(*lp->remove)(&lp->user);
|
||||
}
|
||||
spin_unlock(&opened_lock);
|
||||
}
|
||||
|
||||
__uml_exitcall(close_devices);
|
||||
|
||||
void iter_addresses(void *d, void (*cb)(unsigned char *, unsigned char *,
|
||||
void *),
|
||||
void *arg)
|
||||
{
|
||||
struct net_device *dev = d;
|
||||
struct in_device *ip = dev->ip_ptr;
|
||||
struct in_ifaddr *in;
|
||||
unsigned char address[4], netmask[4];
|
||||
|
||||
if (ip == NULL) return;
|
||||
in = ip->ifa_list;
|
||||
while (in != NULL) {
|
||||
memcpy(address, &in->ifa_address, sizeof(address));
|
||||
memcpy(netmask, &in->ifa_mask, sizeof(netmask));
|
||||
(*cb)(address, netmask, arg);
|
||||
in = in->ifa_next;
|
||||
}
|
||||
}
|
||||
|
||||
int dev_netmask(void *d, void *m)
|
||||
{
|
||||
struct net_device *dev = d;
|
||||
struct in_device *ip = dev->ip_ptr;
|
||||
struct in_ifaddr *in;
|
||||
__be32 *mask_out = m;
|
||||
|
||||
if (ip == NULL)
|
||||
return 1;
|
||||
|
||||
in = ip->ifa_list;
|
||||
if (in == NULL)
|
||||
return 1;
|
||||
|
||||
*mask_out = in->ifa_mask;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *get_output_buffer(int *len_out)
|
||||
{
|
||||
void *ret;
|
||||
|
||||
ret = (void *) __get_free_pages(GFP_KERNEL, 0);
|
||||
if (ret) *len_out = PAGE_SIZE;
|
||||
else *len_out = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void free_output_buffer(void *buffer)
|
||||
{
|
||||
free_pages((unsigned long) buffer, 0);
|
||||
}
|
||||
|
||||
int tap_setup_common(char *str, char *type, char **dev_name, char **mac_out,
|
||||
char **gate_addr)
|
||||
{
|
||||
char *remain;
|
||||
|
||||
remain = split_if_spec(str, dev_name, mac_out, gate_addr, NULL);
|
||||
if (remain != NULL) {
|
||||
printk(KERN_ERR "tap_setup_common - Extra garbage on "
|
||||
"specification : '%s'\n", remain);
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned short eth_protocol(struct sk_buff *skb)
|
||||
{
|
||||
return eth_type_trans(skb, skb->dev);
|
||||
}
|
|
@ -1,271 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/wait.h>
|
||||
#include <net_user.h>
|
||||
#include <os.h>
|
||||
#include <um_malloc.h>
|
||||
|
||||
int tap_open_common(void *dev, char *gate_addr)
|
||||
{
|
||||
int tap_addr[4];
|
||||
|
||||
if (gate_addr == NULL)
|
||||
return 0;
|
||||
if (sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
|
||||
&tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4) {
|
||||
printk(UM_KERN_ERR "Invalid tap IP address - '%s'\n",
|
||||
gate_addr);
|
||||
return -EINVAL;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void tap_check_ips(char *gate_addr, unsigned char *eth_addr)
|
||||
{
|
||||
int tap_addr[4];
|
||||
|
||||
if ((gate_addr != NULL) &&
|
||||
(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
|
||||
&tap_addr[1], &tap_addr[2], &tap_addr[3]) == 4) &&
|
||||
(eth_addr[0] == tap_addr[0]) &&
|
||||
(eth_addr[1] == tap_addr[1]) &&
|
||||
(eth_addr[2] == tap_addr[2]) &&
|
||||
(eth_addr[3] == tap_addr[3])) {
|
||||
printk(UM_KERN_ERR "The tap IP address and the UML eth IP "
|
||||
"address must be different\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Do reliable error handling as this fails frequently enough. */
|
||||
void read_output(int fd, char *output, int len)
|
||||
{
|
||||
int remain, ret, expected;
|
||||
char c;
|
||||
char *str;
|
||||
|
||||
if (output == NULL) {
|
||||
output = &c;
|
||||
len = sizeof(c);
|
||||
}
|
||||
|
||||
*output = '\0';
|
||||
ret = read(fd, &remain, sizeof(remain));
|
||||
|
||||
if (ret != sizeof(remain)) {
|
||||
if (ret < 0)
|
||||
ret = -errno;
|
||||
expected = sizeof(remain);
|
||||
str = "length";
|
||||
goto err;
|
||||
}
|
||||
|
||||
while (remain != 0) {
|
||||
expected = (remain < len) ? remain : len;
|
||||
ret = read(fd, output, expected);
|
||||
if (ret != expected) {
|
||||
if (ret < 0)
|
||||
ret = -errno;
|
||||
str = "data";
|
||||
goto err;
|
||||
}
|
||||
remain -= ret;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
err:
|
||||
if (ret < 0)
|
||||
printk(UM_KERN_ERR "read_output - read of %s failed, "
|
||||
"errno = %d\n", str, -ret);
|
||||
else
|
||||
printk(UM_KERN_ERR "read_output - read of %s failed, read only "
|
||||
"%d of %d bytes\n", str, ret, expected);
|
||||
}
|
||||
|
||||
int net_read(int fd, void *buf, int len)
|
||||
{
|
||||
int n;
|
||||
|
||||
n = read(fd, buf, len);
|
||||
|
||||
if ((n < 0) && (errno == EAGAIN))
|
||||
return 0;
|
||||
else if (n == 0)
|
||||
return -ENOTCONN;
|
||||
return n;
|
||||
}
|
||||
|
||||
int net_recvfrom(int fd, void *buf, int len)
|
||||
{
|
||||
int n;
|
||||
|
||||
CATCH_EINTR(n = recvfrom(fd, buf, len, 0, NULL, NULL));
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN)
|
||||
return 0;
|
||||
return -errno;
|
||||
}
|
||||
else if (n == 0)
|
||||
return -ENOTCONN;
|
||||
return n;
|
||||
}
|
||||
|
||||
int net_write(int fd, void *buf, int len)
|
||||
{
|
||||
int n;
|
||||
|
||||
n = write(fd, buf, len);
|
||||
|
||||
if ((n < 0) && (errno == EAGAIN))
|
||||
return 0;
|
||||
else if (n == 0)
|
||||
return -ENOTCONN;
|
||||
return n;
|
||||
}
|
||||
|
||||
int net_send(int fd, void *buf, int len)
|
||||
{
|
||||
int n;
|
||||
|
||||
CATCH_EINTR(n = send(fd, buf, len, 0));
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN)
|
||||
return 0;
|
||||
return -errno;
|
||||
}
|
||||
else if (n == 0)
|
||||
return -ENOTCONN;
|
||||
return n;
|
||||
}
|
||||
|
||||
int net_sendto(int fd, void *buf, int len, void *to, int sock_len)
|
||||
{
|
||||
int n;
|
||||
|
||||
CATCH_EINTR(n = sendto(fd, buf, len, 0, (struct sockaddr *) to,
|
||||
sock_len));
|
||||
if (n < 0) {
|
||||
if (errno == EAGAIN)
|
||||
return 0;
|
||||
return -errno;
|
||||
}
|
||||
else if (n == 0)
|
||||
return -ENOTCONN;
|
||||
return n;
|
||||
}
|
||||
|
||||
struct change_pre_exec_data {
|
||||
int close_me;
|
||||
int stdout_fd;
|
||||
};
|
||||
|
||||
static void change_pre_exec(void *arg)
|
||||
{
|
||||
struct change_pre_exec_data *data = arg;
|
||||
|
||||
close(data->close_me);
|
||||
dup2(data->stdout_fd, 1);
|
||||
}
|
||||
|
||||
static int change_tramp(char **argv, char *output, int output_len)
|
||||
{
|
||||
int pid, fds[2], err;
|
||||
struct change_pre_exec_data pe_data;
|
||||
|
||||
err = os_pipe(fds, 1, 0);
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "change_tramp - pipe failed, err = %d\n",
|
||||
-err);
|
||||
return err;
|
||||
}
|
||||
pe_data.close_me = fds[0];
|
||||
pe_data.stdout_fd = fds[1];
|
||||
pid = run_helper(change_pre_exec, &pe_data, argv);
|
||||
|
||||
if (pid > 0) /* Avoid hang as we won't get data in failure case. */
|
||||
read_output(fds[0], output, output_len);
|
||||
|
||||
close(fds[0]);
|
||||
close(fds[1]);
|
||||
|
||||
if (pid > 0)
|
||||
helper_wait(pid);
|
||||
return pid;
|
||||
}
|
||||
|
||||
static void change(char *dev, char *what, unsigned char *addr,
|
||||
unsigned char *netmask)
|
||||
{
|
||||
char addr_buf[sizeof("255.255.255.255\0")];
|
||||
char netmask_buf[sizeof("255.255.255.255\0")];
|
||||
char version[sizeof("nnnnn\0")];
|
||||
char *argv[] = { "uml_net", version, what, dev, addr_buf,
|
||||
netmask_buf, NULL };
|
||||
char *output;
|
||||
int output_len, pid;
|
||||
|
||||
sprintf(version, "%d", UML_NET_VERSION);
|
||||
sprintf(addr_buf, "%d.%d.%d.%d", addr[0], addr[1], addr[2], addr[3]);
|
||||
sprintf(netmask_buf, "%d.%d.%d.%d", netmask[0], netmask[1],
|
||||
netmask[2], netmask[3]);
|
||||
|
||||
output_len = UM_KERN_PAGE_SIZE;
|
||||
output = uml_kmalloc(output_len, UM_GFP_KERNEL);
|
||||
if (output == NULL)
|
||||
printk(UM_KERN_ERR "change : failed to allocate output "
|
||||
"buffer\n");
|
||||
|
||||
pid = change_tramp(argv, output, output_len);
|
||||
if (pid < 0) {
|
||||
kfree(output);
|
||||
return;
|
||||
}
|
||||
|
||||
if (output != NULL) {
|
||||
printk("%s", output);
|
||||
kfree(output);
|
||||
}
|
||||
}
|
||||
|
||||
void open_addr(unsigned char *addr, unsigned char *netmask, void *arg)
|
||||
{
|
||||
change(arg, "add", addr, netmask);
|
||||
}
|
||||
|
||||
void close_addr(unsigned char *addr, unsigned char *netmask, void *arg)
|
||||
{
|
||||
change(arg, "del", addr, netmask);
|
||||
}
|
||||
|
||||
char *split_if_spec(char *str, ...)
|
||||
{
|
||||
char **arg, *end, *ret = NULL;
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, str);
|
||||
while ((arg = va_arg(ap, char **)) != NULL) {
|
||||
if (*str == '\0')
|
||||
goto out;
|
||||
end = strchr(str, ',');
|
||||
if (end != str)
|
||||
*arg = str;
|
||||
if (end == NULL)
|
||||
goto out;
|
||||
*end++ = '\0';
|
||||
str = end;
|
||||
}
|
||||
ret = str;
|
||||
out:
|
||||
va_end(ap);
|
||||
return ret;
|
||||
}
|
|
@ -1,21 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __UM_SLIP_H
|
||||
#define __UM_SLIP_H
|
||||
|
||||
#include "slip_common.h"
|
||||
|
||||
struct slip_data {
|
||||
void *dev;
|
||||
char name[sizeof("slnnnnn\0")];
|
||||
char *addr;
|
||||
char *gate_addr;
|
||||
int slave;
|
||||
struct slip_proto slip;
|
||||
};
|
||||
|
||||
extern const struct net_user_info slip_user_info;
|
||||
|
||||
extern int slip_user_read(int fd, void *buf, int len, struct slip_data *pri);
|
||||
extern int slip_user_write(int fd, void *buf, int len, struct slip_data *pri);
|
||||
|
||||
#endif
|
|
@ -1,55 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <string.h>
|
||||
#include "slip_common.h"
|
||||
#include <net_user.h>
|
||||
|
||||
int slip_proto_read(int fd, void *buf, int len, struct slip_proto *slip)
|
||||
{
|
||||
int i, n, size, start;
|
||||
|
||||
if(slip->more > 0){
|
||||
i = 0;
|
||||
while(i < slip->more){
|
||||
size = slip_unesc(slip->ibuf[i++], slip->ibuf,
|
||||
&slip->pos, &slip->esc);
|
||||
if(size){
|
||||
memcpy(buf, slip->ibuf, size);
|
||||
memmove(slip->ibuf, &slip->ibuf[i],
|
||||
slip->more - i);
|
||||
slip->more = slip->more - i;
|
||||
return size;
|
||||
}
|
||||
}
|
||||
slip->more = 0;
|
||||
}
|
||||
|
||||
n = net_read(fd, &slip->ibuf[slip->pos],
|
||||
sizeof(slip->ibuf) - slip->pos);
|
||||
if(n <= 0)
|
||||
return n;
|
||||
|
||||
start = slip->pos;
|
||||
for(i = 0; i < n; i++){
|
||||
size = slip_unesc(slip->ibuf[start + i], slip->ibuf,&slip->pos,
|
||||
&slip->esc);
|
||||
if(size){
|
||||
memcpy(buf, slip->ibuf, size);
|
||||
memmove(slip->ibuf, &slip->ibuf[start+i+1],
|
||||
n - (i + 1));
|
||||
slip->more = n - (i + 1);
|
||||
return size;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int slip_proto_write(int fd, void *buf, int len, struct slip_proto *slip)
|
||||
{
|
||||
int actual, n;
|
||||
|
||||
actual = slip_esc(buf, slip->obuf, len);
|
||||
n = net_write(fd, slip->obuf, actual);
|
||||
if(n < 0)
|
||||
return n;
|
||||
else return len;
|
||||
}
|
|
@ -1,106 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __UM_SLIP_COMMON_H
|
||||
#define __UM_SLIP_COMMON_H
|
||||
|
||||
#define BUF_SIZE 1500
|
||||
/* two bytes each for a (pathological) max packet of escaped chars + *
|
||||
* terminating END char + initial END char */
|
||||
#define ENC_BUF_SIZE (2 * BUF_SIZE + 2)
|
||||
|
||||
/* SLIP protocol characters. */
|
||||
#define SLIP_END 0300 /* indicates end of frame */
|
||||
#define SLIP_ESC 0333 /* indicates byte stuffing */
|
||||
#define SLIP_ESC_END 0334 /* ESC ESC_END means END 'data' */
|
||||
#define SLIP_ESC_ESC 0335 /* ESC ESC_ESC means ESC 'data' */
|
||||
|
||||
static inline int slip_unesc(unsigned char c, unsigned char *buf, int *pos,
|
||||
int *esc)
|
||||
{
|
||||
int ret;
|
||||
|
||||
switch(c){
|
||||
case SLIP_END:
|
||||
*esc = 0;
|
||||
ret=*pos;
|
||||
*pos=0;
|
||||
return(ret);
|
||||
case SLIP_ESC:
|
||||
*esc = 1;
|
||||
return(0);
|
||||
case SLIP_ESC_ESC:
|
||||
if(*esc){
|
||||
*esc = 0;
|
||||
c = SLIP_ESC;
|
||||
}
|
||||
break;
|
||||
case SLIP_ESC_END:
|
||||
if(*esc){
|
||||
*esc = 0;
|
||||
c = SLIP_END;
|
||||
}
|
||||
break;
|
||||
}
|
||||
buf[(*pos)++] = c;
|
||||
return(0);
|
||||
}
|
||||
|
||||
static inline int slip_esc(unsigned char *s, unsigned char *d, int len)
|
||||
{
|
||||
unsigned char *ptr = d;
|
||||
unsigned char c;
|
||||
|
||||
/*
|
||||
* Send an initial END character to flush out any
|
||||
* data that may have accumulated in the receiver
|
||||
* due to line noise.
|
||||
*/
|
||||
|
||||
*ptr++ = SLIP_END;
|
||||
|
||||
/*
|
||||
* For each byte in the packet, send the appropriate
|
||||
* character sequence, according to the SLIP protocol.
|
||||
*/
|
||||
|
||||
while (len-- > 0) {
|
||||
switch(c = *s++) {
|
||||
case SLIP_END:
|
||||
*ptr++ = SLIP_ESC;
|
||||
*ptr++ = SLIP_ESC_END;
|
||||
break;
|
||||
case SLIP_ESC:
|
||||
*ptr++ = SLIP_ESC;
|
||||
*ptr++ = SLIP_ESC_ESC;
|
||||
break;
|
||||
default:
|
||||
*ptr++ = c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*ptr++ = SLIP_END;
|
||||
return (ptr - d);
|
||||
}
|
||||
|
||||
struct slip_proto {
|
||||
unsigned char ibuf[ENC_BUF_SIZE];
|
||||
unsigned char obuf[ENC_BUF_SIZE];
|
||||
int more; /* more data: do not read fd until ibuf has been drained */
|
||||
int pos;
|
||||
int esc;
|
||||
};
|
||||
|
||||
static inline void slip_proto_init(struct slip_proto * slip)
|
||||
{
|
||||
memset(slip->ibuf, 0, sizeof(slip->ibuf));
|
||||
memset(slip->obuf, 0, sizeof(slip->obuf));
|
||||
slip->more = 0;
|
||||
slip->pos = 0;
|
||||
slip->esc = 0;
|
||||
}
|
||||
|
||||
extern int slip_proto_read(int fd, void *buf, int len,
|
||||
struct slip_proto *slip);
|
||||
extern int slip_proto_write(int fd, void *buf, int len,
|
||||
struct slip_proto *slip);
|
||||
|
||||
#endif
|
|
@ -1,93 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <linux/if_arp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <net_kern.h>
|
||||
#include "slip.h"
|
||||
|
||||
struct slip_init {
|
||||
char *gate_addr;
|
||||
};
|
||||
|
||||
static void slip_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct uml_net_private *private;
|
||||
struct slip_data *spri;
|
||||
struct slip_init *init = data;
|
||||
|
||||
private = netdev_priv(dev);
|
||||
spri = (struct slip_data *) private->user;
|
||||
|
||||
memset(spri->name, 0, sizeof(spri->name));
|
||||
spri->addr = NULL;
|
||||
spri->gate_addr = init->gate_addr;
|
||||
spri->slave = -1;
|
||||
spri->dev = dev;
|
||||
|
||||
slip_proto_init(&spri->slip);
|
||||
|
||||
dev->hard_header_len = 0;
|
||||
dev->header_ops = NULL;
|
||||
dev->addr_len = 0;
|
||||
dev->type = ARPHRD_SLIP;
|
||||
dev->tx_queue_len = 256;
|
||||
dev->flags = IFF_NOARP;
|
||||
printk("SLIP backend - SLIP IP = %s\n", spri->gate_addr);
|
||||
}
|
||||
|
||||
static unsigned short slip_protocol(struct sk_buff *skbuff)
|
||||
{
|
||||
return htons(ETH_P_IP);
|
||||
}
|
||||
|
||||
static int slip_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return slip_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
|
||||
(struct slip_data *) &lp->user);
|
||||
}
|
||||
|
||||
static int slip_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return slip_user_write(fd, skb->data, skb->len,
|
||||
(struct slip_data *) &lp->user);
|
||||
}
|
||||
|
||||
static const struct net_kern_info slip_kern_info = {
|
||||
.init = slip_init,
|
||||
.protocol = slip_protocol,
|
||||
.read = slip_read,
|
||||
.write = slip_write,
|
||||
};
|
||||
|
||||
static int slip_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct slip_init *init = data;
|
||||
|
||||
*init = ((struct slip_init) { .gate_addr = NULL });
|
||||
|
||||
if (str[0] != '\0')
|
||||
init->gate_addr = str;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport slip_transport = {
|
||||
.list = LIST_HEAD_INIT(slip_transport.list),
|
||||
.name = "slip",
|
||||
.setup = slip_setup,
|
||||
.user = &slip_user_info,
|
||||
.kern = &slip_kern_info,
|
||||
.private_size = sizeof(struct slip_data),
|
||||
.setup_size = sizeof(struct slip_init),
|
||||
};
|
||||
|
||||
static int register_slip(void)
|
||||
{
|
||||
register_transport(&slip_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_slip);
|
|
@ -1,252 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <termios.h>
|
||||
#include <sys/wait.h>
|
||||
#include <net_user.h>
|
||||
#include <os.h>
|
||||
#include "slip.h"
|
||||
#include <um_malloc.h>
|
||||
|
||||
static int slip_user_init(void *data, void *dev)
|
||||
{
|
||||
struct slip_data *pri = data;
|
||||
|
||||
pri->dev = dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_up_tty(int fd)
|
||||
{
|
||||
int i;
|
||||
struct termios tios;
|
||||
|
||||
if (tcgetattr(fd, &tios) < 0) {
|
||||
printk(UM_KERN_ERR "could not get initial terminal "
|
||||
"attributes\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
tios.c_cflag = CS8 | CREAD | HUPCL | CLOCAL;
|
||||
tios.c_iflag = IGNBRK | IGNPAR;
|
||||
tios.c_oflag = 0;
|
||||
tios.c_lflag = 0;
|
||||
for (i = 0; i < NCCS; i++)
|
||||
tios.c_cc[i] = 0;
|
||||
tios.c_cc[VMIN] = 1;
|
||||
tios.c_cc[VTIME] = 0;
|
||||
|
||||
cfsetospeed(&tios, B38400);
|
||||
cfsetispeed(&tios, B38400);
|
||||
|
||||
if (tcsetattr(fd, TCSAFLUSH, &tios) < 0) {
|
||||
printk(UM_KERN_ERR "failed to set terminal attributes\n");
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct slip_pre_exec_data {
|
||||
int stdin_fd;
|
||||
int stdout_fd;
|
||||
int close_me;
|
||||
};
|
||||
|
||||
static void slip_pre_exec(void *arg)
|
||||
{
|
||||
struct slip_pre_exec_data *data = arg;
|
||||
|
||||
if (data->stdin_fd >= 0)
|
||||
dup2(data->stdin_fd, 0);
|
||||
dup2(data->stdout_fd, 1);
|
||||
if (data->close_me >= 0)
|
||||
close(data->close_me);
|
||||
}
|
||||
|
||||
static int slip_tramp(char **argv, int fd)
|
||||
{
|
||||
struct slip_pre_exec_data pe_data;
|
||||
char *output;
|
||||
int pid, fds[2], err, output_len;
|
||||
|
||||
err = os_pipe(fds, 1, 0);
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "slip_tramp : pipe failed, err = %d\n",
|
||||
-err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = 0;
|
||||
pe_data.stdin_fd = fd;
|
||||
pe_data.stdout_fd = fds[1];
|
||||
pe_data.close_me = fds[0];
|
||||
err = run_helper(slip_pre_exec, &pe_data, argv);
|
||||
if (err < 0)
|
||||
goto out_close;
|
||||
pid = err;
|
||||
|
||||
output_len = UM_KERN_PAGE_SIZE;
|
||||
output = uml_kmalloc(output_len, UM_GFP_KERNEL);
|
||||
if (output == NULL) {
|
||||
printk(UM_KERN_ERR "slip_tramp : failed to allocate output "
|
||||
"buffer\n");
|
||||
os_kill_process(pid, 1);
|
||||
err = -ENOMEM;
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
close(fds[1]);
|
||||
read_output(fds[0], output, output_len);
|
||||
printk("%s", output);
|
||||
|
||||
err = helper_wait(pid);
|
||||
close(fds[0]);
|
||||
|
||||
kfree(output);
|
||||
return err;
|
||||
|
||||
out_close:
|
||||
close(fds[0]);
|
||||
close(fds[1]);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int slip_open(void *data)
|
||||
{
|
||||
struct slip_data *pri = data;
|
||||
char version_buf[sizeof("nnnnn\0")];
|
||||
char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
|
||||
char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf,
|
||||
NULL };
|
||||
int sfd, mfd, err;
|
||||
|
||||
err = get_pty();
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "slip-open : Failed to open pty, err = %d\n",
|
||||
-err);
|
||||
goto out;
|
||||
}
|
||||
mfd = err;
|
||||
|
||||
err = open(ptsname(mfd), O_RDWR, 0);
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "Couldn't open tty for slip line, "
|
||||
"err = %d\n", -err);
|
||||
goto out_close;
|
||||
}
|
||||
sfd = err;
|
||||
|
||||
err = set_up_tty(sfd);
|
||||
if (err)
|
||||
goto out_close2;
|
||||
|
||||
pri->slave = sfd;
|
||||
pri->slip.pos = 0;
|
||||
pri->slip.esc = 0;
|
||||
if (pri->gate_addr != NULL) {
|
||||
sprintf(version_buf, "%d", UML_NET_VERSION);
|
||||
strcpy(gate_buf, pri->gate_addr);
|
||||
|
||||
err = slip_tramp(argv, sfd);
|
||||
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "slip_tramp failed - err = %d\n",
|
||||
-err);
|
||||
goto out_close2;
|
||||
}
|
||||
err = os_get_ifname(pri->slave, pri->name);
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "get_ifname failed, err = %d\n",
|
||||
-err);
|
||||
goto out_close2;
|
||||
}
|
||||
iter_addresses(pri->dev, open_addr, pri->name);
|
||||
}
|
||||
else {
|
||||
err = os_set_slip(sfd);
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "Failed to set slip discipline "
|
||||
"encapsulation - err = %d\n", -err);
|
||||
goto out_close2;
|
||||
}
|
||||
}
|
||||
return mfd;
|
||||
out_close2:
|
||||
close(sfd);
|
||||
out_close:
|
||||
close(mfd);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void slip_close(int fd, void *data)
|
||||
{
|
||||
struct slip_data *pri = data;
|
||||
char version_buf[sizeof("nnnnn\0")];
|
||||
char *argv[] = { "uml_net", version_buf, "slip", "down", pri->name,
|
||||
NULL };
|
||||
int err;
|
||||
|
||||
if (pri->gate_addr != NULL)
|
||||
iter_addresses(pri->dev, close_addr, pri->name);
|
||||
|
||||
sprintf(version_buf, "%d", UML_NET_VERSION);
|
||||
|
||||
err = slip_tramp(argv, pri->slave);
|
||||
|
||||
if (err != 0)
|
||||
printk(UM_KERN_ERR "slip_tramp failed - errno = %d\n", -err);
|
||||
close(fd);
|
||||
close(pri->slave);
|
||||
pri->slave = -1;
|
||||
}
|
||||
|
||||
int slip_user_read(int fd, void *buf, int len, struct slip_data *pri)
|
||||
{
|
||||
return slip_proto_read(fd, buf, len, &pri->slip);
|
||||
}
|
||||
|
||||
int slip_user_write(int fd, void *buf, int len, struct slip_data *pri)
|
||||
{
|
||||
return slip_proto_write(fd, buf, len, &pri->slip);
|
||||
}
|
||||
|
||||
static void slip_add_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *data)
|
||||
{
|
||||
struct slip_data *pri = data;
|
||||
|
||||
if (pri->slave < 0)
|
||||
return;
|
||||
open_addr(addr, netmask, pri->name);
|
||||
}
|
||||
|
||||
static void slip_del_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *data)
|
||||
{
|
||||
struct slip_data *pri = data;
|
||||
|
||||
if (pri->slave < 0)
|
||||
return;
|
||||
close_addr(addr, netmask, pri->name);
|
||||
}
|
||||
|
||||
const struct net_user_info slip_user_info = {
|
||||
.init = slip_user_init,
|
||||
.open = slip_open,
|
||||
.close = slip_close,
|
||||
.remove = NULL,
|
||||
.add_address = slip_add_addr,
|
||||
.delete_address = slip_del_addr,
|
||||
.mtu = BUF_SIZE,
|
||||
.max_packet = BUF_SIZE,
|
||||
};
|
|
@ -1,34 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __UM_SLIRP_H
|
||||
#define __UM_SLIRP_H
|
||||
|
||||
#include "slip_common.h"
|
||||
|
||||
#define SLIRP_MAX_ARGS 100
|
||||
/*
|
||||
* XXX this next definition is here because I don't understand why this
|
||||
* initializer doesn't work in slirp_kern.c:
|
||||
*
|
||||
* argv : { init->argv[ 0 ... SLIRP_MAX_ARGS-1 ] },
|
||||
*
|
||||
* or why I can't typecast like this:
|
||||
*
|
||||
* argv : (char* [SLIRP_MAX_ARGS])(init->argv),
|
||||
*/
|
||||
struct arg_list_dummy_wrapper { char *argv[SLIRP_MAX_ARGS]; };
|
||||
|
||||
struct slirp_data {
|
||||
void *dev;
|
||||
struct arg_list_dummy_wrapper argw;
|
||||
int pid;
|
||||
int slave;
|
||||
struct slip_proto slip;
|
||||
};
|
||||
|
||||
extern const struct net_user_info slirp_user_info;
|
||||
|
||||
extern int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri);
|
||||
extern int slirp_user_write(int fd, void *buf, int len,
|
||||
struct slirp_data *pri);
|
||||
|
||||
#endif
|
|
@ -1,120 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <linux/if_arp.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/string.h>
|
||||
#include <net_kern.h>
|
||||
#include <net_user.h>
|
||||
#include "slirp.h"
|
||||
|
||||
struct slirp_init {
|
||||
struct arg_list_dummy_wrapper argw; /* XXX should be simpler... */
|
||||
};
|
||||
|
||||
static void slirp_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct uml_net_private *private;
|
||||
struct slirp_data *spri;
|
||||
struct slirp_init *init = data;
|
||||
int i;
|
||||
|
||||
private = netdev_priv(dev);
|
||||
spri = (struct slirp_data *) private->user;
|
||||
|
||||
spri->argw = init->argw;
|
||||
spri->pid = -1;
|
||||
spri->slave = -1;
|
||||
spri->dev = dev;
|
||||
|
||||
slip_proto_init(&spri->slip);
|
||||
|
||||
dev->hard_header_len = 0;
|
||||
dev->header_ops = NULL;
|
||||
dev->addr_len = 0;
|
||||
dev->type = ARPHRD_SLIP;
|
||||
dev->tx_queue_len = 256;
|
||||
dev->flags = IFF_NOARP;
|
||||
printk("SLIRP backend - command line:");
|
||||
for (i = 0; spri->argw.argv[i] != NULL; i++)
|
||||
printk(" '%s'",spri->argw.argv[i]);
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
static unsigned short slirp_protocol(struct sk_buff *skbuff)
|
||||
{
|
||||
return htons(ETH_P_IP);
|
||||
}
|
||||
|
||||
static int slirp_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return slirp_user_read(fd, skb_mac_header(skb), skb->dev->mtu,
|
||||
(struct slirp_data *) &lp->user);
|
||||
}
|
||||
|
||||
static int slirp_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return slirp_user_write(fd, skb->data, skb->len,
|
||||
(struct slirp_data *) &lp->user);
|
||||
}
|
||||
|
||||
const struct net_kern_info slirp_kern_info = {
|
||||
.init = slirp_init,
|
||||
.protocol = slirp_protocol,
|
||||
.read = slirp_read,
|
||||
.write = slirp_write,
|
||||
};
|
||||
|
||||
static int slirp_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct slirp_init *init = data;
|
||||
int i=0;
|
||||
|
||||
*init = ((struct slirp_init) { .argw = { { "slirp", NULL } } });
|
||||
|
||||
str = split_if_spec(str, mac_out, NULL);
|
||||
|
||||
if (str == NULL) /* no command line given after MAC addr */
|
||||
return 1;
|
||||
|
||||
do {
|
||||
if (i >= SLIRP_MAX_ARGS - 1) {
|
||||
printk(KERN_WARNING "slirp_setup: truncating slirp "
|
||||
"arguments\n");
|
||||
break;
|
||||
}
|
||||
init->argw.argv[i++] = str;
|
||||
while(*str && *str!=',') {
|
||||
if (*str == '_')
|
||||
*str=' ';
|
||||
str++;
|
||||
}
|
||||
if (*str != ',')
|
||||
break;
|
||||
*str++ = '\0';
|
||||
} while (1);
|
||||
|
||||
init->argw.argv[i] = NULL;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport slirp_transport = {
|
||||
.list = LIST_HEAD_INIT(slirp_transport.list),
|
||||
.name = "slirp",
|
||||
.setup = slirp_setup,
|
||||
.user = &slirp_user_info,
|
||||
.kern = &slirp_kern_info,
|
||||
.private_size = sizeof(struct slirp_data),
|
||||
.setup_size = sizeof(struct slirp_init),
|
||||
};
|
||||
|
||||
static int register_slirp(void)
|
||||
{
|
||||
register_transport(&slirp_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_slirp);
|
|
@ -1,124 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/wait.h>
|
||||
#include <net_user.h>
|
||||
#include <os.h>
|
||||
#include "slirp.h"
|
||||
|
||||
static int slirp_user_init(void *data, void *dev)
|
||||
{
|
||||
struct slirp_data *pri = data;
|
||||
|
||||
pri->dev = dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct slirp_pre_exec_data {
|
||||
int stdin_fd;
|
||||
int stdout_fd;
|
||||
};
|
||||
|
||||
static void slirp_pre_exec(void *arg)
|
||||
{
|
||||
struct slirp_pre_exec_data *data = arg;
|
||||
|
||||
if (data->stdin_fd != -1)
|
||||
dup2(data->stdin_fd, 0);
|
||||
if (data->stdout_fd != -1)
|
||||
dup2(data->stdout_fd, 1);
|
||||
}
|
||||
|
||||
static int slirp_tramp(char **argv, int fd)
|
||||
{
|
||||
struct slirp_pre_exec_data pe_data;
|
||||
int pid;
|
||||
|
||||
pe_data.stdin_fd = fd;
|
||||
pe_data.stdout_fd = fd;
|
||||
pid = run_helper(slirp_pre_exec, &pe_data, argv);
|
||||
|
||||
return pid;
|
||||
}
|
||||
|
||||
static int slirp_open(void *data)
|
||||
{
|
||||
struct slirp_data *pri = data;
|
||||
int fds[2], err;
|
||||
|
||||
err = os_pipe(fds, 1, 1);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = slirp_tramp(pri->argw.argv, fds[1]);
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "slirp_tramp failed - errno = %d\n", -err);
|
||||
goto out;
|
||||
}
|
||||
|
||||
pri->slave = fds[1];
|
||||
pri->slip.pos = 0;
|
||||
pri->slip.esc = 0;
|
||||
pri->pid = err;
|
||||
|
||||
return fds[0];
|
||||
out:
|
||||
close(fds[0]);
|
||||
close(fds[1]);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void slirp_close(int fd, void *data)
|
||||
{
|
||||
struct slirp_data *pri = data;
|
||||
int err;
|
||||
|
||||
close(fd);
|
||||
close(pri->slave);
|
||||
|
||||
pri->slave = -1;
|
||||
|
||||
if (pri->pid<1) {
|
||||
printk(UM_KERN_ERR "slirp_close: no child process to shut "
|
||||
"down\n");
|
||||
return;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (kill(pri->pid, SIGHUP)<0) {
|
||||
printk(UM_KERN_ERR "slirp_close: sending hangup to %d failed "
|
||||
"(%d)\n", pri->pid, errno);
|
||||
}
|
||||
#endif
|
||||
err = helper_wait(pri->pid);
|
||||
if (err < 0)
|
||||
return;
|
||||
|
||||
pri->pid = -1;
|
||||
}
|
||||
|
||||
int slirp_user_read(int fd, void *buf, int len, struct slirp_data *pri)
|
||||
{
|
||||
return slip_proto_read(fd, buf, len, &pri->slip);
|
||||
}
|
||||
|
||||
int slirp_user_write(int fd, void *buf, int len, struct slirp_data *pri)
|
||||
{
|
||||
return slip_proto_write(fd, buf, len, &pri->slip);
|
||||
}
|
||||
|
||||
const struct net_user_info slirp_user_info = {
|
||||
.init = slirp_user_init,
|
||||
.open = slirp_open,
|
||||
.close = slirp_close,
|
||||
.remove = NULL,
|
||||
.add_address = NULL,
|
||||
.delete_address = NULL,
|
||||
.mtu = BUF_SIZE,
|
||||
.max_packet = BUF_SIZE,
|
||||
};
|
|
@ -1,27 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#ifndef __DRIVERS_UMCAST_H
|
||||
#define __DRIVERS_UMCAST_H
|
||||
|
||||
#include <net_user.h>
|
||||
|
||||
struct umcast_data {
|
||||
char *addr;
|
||||
unsigned short lport;
|
||||
unsigned short rport;
|
||||
void *listen_addr;
|
||||
void *remote_addr;
|
||||
int ttl;
|
||||
int unicast;
|
||||
void *dev;
|
||||
};
|
||||
|
||||
extern const struct net_user_info umcast_user_info;
|
||||
|
||||
extern int umcast_user_write(int fd, void *buf, int len,
|
||||
struct umcast_data *pri);
|
||||
|
||||
#endif
|
|
@ -1,188 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* user-mode-linux networking multicast transport
|
||||
* Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*
|
||||
* based on the existing uml-networking code, which is
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include "umcast.h"
|
||||
#include <net_kern.h>
|
||||
|
||||
struct umcast_init {
|
||||
char *addr;
|
||||
int lport;
|
||||
int rport;
|
||||
int ttl;
|
||||
bool unicast;
|
||||
};
|
||||
|
||||
static void umcast_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct uml_net_private *pri;
|
||||
struct umcast_data *dpri;
|
||||
struct umcast_init *init = data;
|
||||
|
||||
pri = netdev_priv(dev);
|
||||
dpri = (struct umcast_data *) pri->user;
|
||||
dpri->addr = init->addr;
|
||||
dpri->lport = init->lport;
|
||||
dpri->rport = init->rport;
|
||||
dpri->unicast = init->unicast;
|
||||
dpri->ttl = init->ttl;
|
||||
dpri->dev = dev;
|
||||
|
||||
if (dpri->unicast) {
|
||||
printk(KERN_INFO "ucast backend address: %s:%u listen port: "
|
||||
"%u\n", dpri->addr, dpri->rport, dpri->lport);
|
||||
} else {
|
||||
printk(KERN_INFO "mcast backend multicast address: %s:%u, "
|
||||
"TTL:%u\n", dpri->addr, dpri->lport, dpri->ttl);
|
||||
}
|
||||
}
|
||||
|
||||
static int umcast_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return net_recvfrom(fd, skb_mac_header(skb),
|
||||
skb->dev->mtu + ETH_HEADER_OTHER);
|
||||
}
|
||||
|
||||
static int umcast_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return umcast_user_write(fd, skb->data, skb->len,
|
||||
(struct umcast_data *) &lp->user);
|
||||
}
|
||||
|
||||
static const struct net_kern_info umcast_kern_info = {
|
||||
.init = umcast_init,
|
||||
.protocol = eth_protocol,
|
||||
.read = umcast_read,
|
||||
.write = umcast_write,
|
||||
};
|
||||
|
||||
static int mcast_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct umcast_init *init = data;
|
||||
char *port_str = NULL, *ttl_str = NULL, *remain;
|
||||
char *last;
|
||||
|
||||
*init = ((struct umcast_init)
|
||||
{ .addr = "239.192.168.1",
|
||||
.lport = 1102,
|
||||
.ttl = 1 });
|
||||
|
||||
remain = split_if_spec(str, mac_out, &init->addr, &port_str, &ttl_str,
|
||||
NULL);
|
||||
if (remain != NULL) {
|
||||
printk(KERN_ERR "mcast_setup - Extra garbage on "
|
||||
"specification : '%s'\n", remain);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (port_str != NULL) {
|
||||
init->lport = simple_strtoul(port_str, &last, 10);
|
||||
if ((*last != '\0') || (last == port_str)) {
|
||||
printk(KERN_ERR "mcast_setup - Bad port : '%s'\n",
|
||||
port_str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (ttl_str != NULL) {
|
||||
init->ttl = simple_strtoul(ttl_str, &last, 10);
|
||||
if ((*last != '\0') || (last == ttl_str)) {
|
||||
printk(KERN_ERR "mcast_setup - Bad ttl : '%s'\n",
|
||||
ttl_str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
init->unicast = false;
|
||||
init->rport = init->lport;
|
||||
|
||||
printk(KERN_INFO "Configured mcast device: %s:%u-%u\n", init->addr,
|
||||
init->lport, init->ttl);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int ucast_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct umcast_init *init = data;
|
||||
char *lport_str = NULL, *rport_str = NULL, *remain;
|
||||
char *last;
|
||||
|
||||
*init = ((struct umcast_init)
|
||||
{ .addr = "",
|
||||
.lport = 1102,
|
||||
.rport = 1102 });
|
||||
|
||||
remain = split_if_spec(str, mac_out, &init->addr,
|
||||
&lport_str, &rport_str, NULL);
|
||||
if (remain != NULL) {
|
||||
printk(KERN_ERR "ucast_setup - Extra garbage on "
|
||||
"specification : '%s'\n", remain);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (lport_str != NULL) {
|
||||
init->lport = simple_strtoul(lport_str, &last, 10);
|
||||
if ((*last != '\0') || (last == lport_str)) {
|
||||
printk(KERN_ERR "ucast_setup - Bad listen port : "
|
||||
"'%s'\n", lport_str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (rport_str != NULL) {
|
||||
init->rport = simple_strtoul(rport_str, &last, 10);
|
||||
if ((*last != '\0') || (last == rport_str)) {
|
||||
printk(KERN_ERR "ucast_setup - Bad remote port : "
|
||||
"'%s'\n", rport_str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
init->unicast = true;
|
||||
|
||||
printk(KERN_INFO "Configured ucast device: :%u -> %s:%u\n",
|
||||
init->lport, init->addr, init->rport);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport mcast_transport = {
|
||||
.list = LIST_HEAD_INIT(mcast_transport.list),
|
||||
.name = "mcast",
|
||||
.setup = mcast_setup,
|
||||
.user = &umcast_user_info,
|
||||
.kern = &umcast_kern_info,
|
||||
.private_size = sizeof(struct umcast_data),
|
||||
.setup_size = sizeof(struct umcast_init),
|
||||
};
|
||||
|
||||
static struct transport ucast_transport = {
|
||||
.list = LIST_HEAD_INIT(ucast_transport.list),
|
||||
.name = "ucast",
|
||||
.setup = ucast_setup,
|
||||
.user = &umcast_user_info,
|
||||
.kern = &umcast_kern_info,
|
||||
.private_size = sizeof(struct umcast_data),
|
||||
.setup_size = sizeof(struct umcast_init),
|
||||
};
|
||||
|
||||
static int register_umcast(void)
|
||||
{
|
||||
register_transport(&mcast_transport);
|
||||
register_transport(&ucast_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_umcast);
|
|
@ -1,184 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* user-mode-linux networking multicast transport
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
* Copyright (C) 2001 by Harald Welte <laforge@gnumonks.org>
|
||||
*
|
||||
* based on the existing uml-networking code, which is
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*
|
||||
*
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <netinet/in.h>
|
||||
#include "umcast.h"
|
||||
#include <net_user.h>
|
||||
#include <um_malloc.h>
|
||||
|
||||
static struct sockaddr_in *new_addr(char *addr, unsigned short port)
|
||||
{
|
||||
struct sockaddr_in *sin;
|
||||
|
||||
sin = uml_kmalloc(sizeof(struct sockaddr_in), UM_GFP_KERNEL);
|
||||
if (sin == NULL) {
|
||||
printk(UM_KERN_ERR "new_addr: allocation of sockaddr_in "
|
||||
"failed\n");
|
||||
return NULL;
|
||||
}
|
||||
sin->sin_family = AF_INET;
|
||||
if (addr)
|
||||
sin->sin_addr.s_addr = in_aton(addr);
|
||||
else
|
||||
sin->sin_addr.s_addr = INADDR_ANY;
|
||||
sin->sin_port = htons(port);
|
||||
return sin;
|
||||
}
|
||||
|
||||
static int umcast_user_init(void *data, void *dev)
|
||||
{
|
||||
struct umcast_data *pri = data;
|
||||
|
||||
pri->remote_addr = new_addr(pri->addr, pri->rport);
|
||||
if (pri->unicast)
|
||||
pri->listen_addr = new_addr(NULL, pri->lport);
|
||||
else
|
||||
pri->listen_addr = pri->remote_addr;
|
||||
pri->dev = dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void umcast_remove(void *data)
|
||||
{
|
||||
struct umcast_data *pri = data;
|
||||
|
||||
kfree(pri->listen_addr);
|
||||
if (pri->unicast)
|
||||
kfree(pri->remote_addr);
|
||||
pri->listen_addr = pri->remote_addr = NULL;
|
||||
}
|
||||
|
||||
static int umcast_open(void *data)
|
||||
{
|
||||
struct umcast_data *pri = data;
|
||||
struct sockaddr_in *lsin = pri->listen_addr;
|
||||
struct sockaddr_in *rsin = pri->remote_addr;
|
||||
struct ip_mreq mreq;
|
||||
int fd, yes = 1, err = -EINVAL;
|
||||
|
||||
|
||||
if ((!pri->unicast && lsin->sin_addr.s_addr == 0) ||
|
||||
(rsin->sin_addr.s_addr == 0) ||
|
||||
(lsin->sin_port == 0) || (rsin->sin_port == 0))
|
||||
goto out;
|
||||
|
||||
fd = socket(AF_INET, SOCK_DGRAM, 0);
|
||||
|
||||
if (fd < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "umcast_open : data socket failed, "
|
||||
"errno = %d\n", errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "umcast_open: SO_REUSEADDR failed, "
|
||||
"errno = %d\n", errno);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
if (!pri->unicast) {
|
||||
/* set ttl according to config */
|
||||
if (setsockopt(fd, SOL_IP, IP_MULTICAST_TTL, &pri->ttl,
|
||||
sizeof(pri->ttl)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_TTL "
|
||||
"failed, error = %d\n", errno);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
/* set LOOP, so data does get fed back to local sockets */
|
||||
if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP,
|
||||
&yes, sizeof(yes)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "umcast_open: IP_MULTICAST_LOOP "
|
||||
"failed, error = %d\n", errno);
|
||||
goto out_close;
|
||||
}
|
||||
}
|
||||
|
||||
/* bind socket to the address */
|
||||
if (bind(fd, (struct sockaddr *) lsin, sizeof(*lsin)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "umcast_open : data bind failed, "
|
||||
"errno = %d\n", errno);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
if (!pri->unicast) {
|
||||
/* subscribe to the multicast group */
|
||||
mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
|
||||
mreq.imr_interface.s_addr = 0;
|
||||
if (setsockopt(fd, SOL_IP, IP_ADD_MEMBERSHIP,
|
||||
&mreq, sizeof(mreq)) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "umcast_open: IP_ADD_MEMBERSHIP "
|
||||
"failed, error = %d\n", errno);
|
||||
printk(UM_KERN_ERR "There appears not to be a "
|
||||
"multicast-capable network interface on the "
|
||||
"host.\n");
|
||||
printk(UM_KERN_ERR "eth0 should be configured in order "
|
||||
"to use the multicast transport.\n");
|
||||
goto out_close;
|
||||
}
|
||||
}
|
||||
|
||||
return fd;
|
||||
|
||||
out_close:
|
||||
close(fd);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static void umcast_close(int fd, void *data)
|
||||
{
|
||||
struct umcast_data *pri = data;
|
||||
|
||||
if (!pri->unicast) {
|
||||
struct ip_mreq mreq;
|
||||
struct sockaddr_in *lsin = pri->listen_addr;
|
||||
|
||||
mreq.imr_multiaddr.s_addr = lsin->sin_addr.s_addr;
|
||||
mreq.imr_interface.s_addr = 0;
|
||||
if (setsockopt(fd, SOL_IP, IP_DROP_MEMBERSHIP,
|
||||
&mreq, sizeof(mreq)) < 0) {
|
||||
printk(UM_KERN_ERR "umcast_close: IP_DROP_MEMBERSHIP "
|
||||
"failed, error = %d\n", errno);
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
int umcast_user_write(int fd, void *buf, int len, struct umcast_data *pri)
|
||||
{
|
||||
struct sockaddr_in *data_addr = pri->remote_addr;
|
||||
|
||||
return net_sendto(fd, buf, len, data_addr, sizeof(*data_addr));
|
||||
}
|
||||
|
||||
const struct net_user_info umcast_user_info = {
|
||||
.init = umcast_user_init,
|
||||
.open = umcast_open,
|
||||
.close = umcast_close,
|
||||
.remove = umcast_remove,
|
||||
.add_address = NULL,
|
||||
.delete_address = NULL,
|
||||
.mtu = ETH_MAX_PACKET,
|
||||
.max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
|
||||
};
|
|
@ -1,32 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
|
||||
*/
|
||||
|
||||
#ifndef __UM_VDE_H__
|
||||
#define __UM_VDE_H__
|
||||
|
||||
struct vde_data {
|
||||
char *vde_switch;
|
||||
char *descr;
|
||||
void *args;
|
||||
void *conn;
|
||||
void *dev;
|
||||
};
|
||||
|
||||
struct vde_init {
|
||||
char *vde_switch;
|
||||
char *descr;
|
||||
int port;
|
||||
char *group;
|
||||
int mode;
|
||||
};
|
||||
|
||||
extern const struct net_user_info vde_user_info;
|
||||
|
||||
extern void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init);
|
||||
|
||||
extern int vde_user_read(void *conn, void *buf, int len);
|
||||
extern int vde_user_write(void *conn, void *buf, int len);
|
||||
|
||||
#endif
|
|
@ -1,129 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
|
||||
*
|
||||
* Transport usage:
|
||||
* ethN=vde,<vde_switch>,<mac addr>,<port>,<group>,<mode>,<description>
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include <net_kern.h>
|
||||
#include <net_user.h>
|
||||
#include "vde.h"
|
||||
|
||||
static void vde_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct vde_init *init = data;
|
||||
struct uml_net_private *pri;
|
||||
struct vde_data *vpri;
|
||||
|
||||
pri = netdev_priv(dev);
|
||||
vpri = (struct vde_data *) pri->user;
|
||||
|
||||
vpri->vde_switch = init->vde_switch;
|
||||
vpri->descr = init->descr ? init->descr : "UML vde_transport";
|
||||
vpri->args = NULL;
|
||||
vpri->conn = NULL;
|
||||
vpri->dev = dev;
|
||||
|
||||
printk("vde backend - %s, ", vpri->vde_switch ?
|
||||
vpri->vde_switch : "(default socket)");
|
||||
|
||||
vde_init_libstuff(vpri, init);
|
||||
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
static int vde_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
struct vde_data *pri = (struct vde_data *) &lp->user;
|
||||
|
||||
if (pri->conn != NULL)
|
||||
return vde_user_read(pri->conn, skb_mac_header(skb),
|
||||
skb->dev->mtu + ETH_HEADER_OTHER);
|
||||
|
||||
printk(KERN_ERR "vde_read - we have no VDECONN to read from");
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
static int vde_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
struct vde_data *pri = (struct vde_data *) &lp->user;
|
||||
|
||||
if (pri->conn != NULL)
|
||||
return vde_user_write((void *)pri->conn, skb->data,
|
||||
skb->len);
|
||||
|
||||
printk(KERN_ERR "vde_write - we have no VDECONN to write to");
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
static const struct net_kern_info vde_kern_info = {
|
||||
.init = vde_init,
|
||||
.protocol = eth_protocol,
|
||||
.read = vde_read,
|
||||
.write = vde_write,
|
||||
};
|
||||
|
||||
static int vde_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct vde_init *init = data;
|
||||
char *remain, *port_str = NULL, *mode_str = NULL, *last;
|
||||
|
||||
*init = ((struct vde_init)
|
||||
{ .vde_switch = NULL,
|
||||
.descr = NULL,
|
||||
.port = 0,
|
||||
.group = NULL,
|
||||
.mode = 0 });
|
||||
|
||||
remain = split_if_spec(str, &init->vde_switch, mac_out, &port_str,
|
||||
&init->group, &mode_str, &init->descr, NULL);
|
||||
|
||||
if (remain != NULL)
|
||||
printk(KERN_WARNING "vde_setup - Ignoring extra data :"
|
||||
"'%s'\n", remain);
|
||||
|
||||
if (port_str != NULL) {
|
||||
init->port = simple_strtoul(port_str, &last, 10);
|
||||
if ((*last != '\0') || (last == port_str)) {
|
||||
printk(KERN_ERR "vde_setup - Bad port : '%s'\n",
|
||||
port_str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (mode_str != NULL) {
|
||||
init->mode = simple_strtoul(mode_str, &last, 8);
|
||||
if ((*last != '\0') || (last == mode_str)) {
|
||||
printk(KERN_ERR "vde_setup - Bad mode : '%s'\n",
|
||||
mode_str);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
printk(KERN_INFO "Configured vde device: %s\n", init->vde_switch ?
|
||||
init->vde_switch : "(default socket)");
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport vde_transport = {
|
||||
.list = LIST_HEAD_INIT(vde_transport.list),
|
||||
.name = "vde",
|
||||
.setup = vde_setup,
|
||||
.user = &vde_user_info,
|
||||
.kern = &vde_kern_info,
|
||||
.private_size = sizeof(struct vde_data),
|
||||
.setup_size = sizeof(struct vde_init),
|
||||
};
|
||||
|
||||
static int register_vde(void)
|
||||
{
|
||||
register_transport(&vde_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_vde);
|
|
@ -1,125 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2007 Luca Bigliardi (shammash@artha.org).
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <errno.h>
|
||||
#include <libvdeplug.h>
|
||||
#include <net_user.h>
|
||||
#include <um_malloc.h>
|
||||
#include "vde.h"
|
||||
|
||||
static int vde_user_init(void *data, void *dev)
|
||||
{
|
||||
struct vde_data *pri = data;
|
||||
VDECONN *conn = NULL;
|
||||
int err = -EINVAL;
|
||||
|
||||
pri->dev = dev;
|
||||
|
||||
conn = vde_open(pri->vde_switch, pri->descr, pri->args);
|
||||
|
||||
if (conn == NULL) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "vde_user_init: vde_open failed, "
|
||||
"errno = %d\n", errno);
|
||||
return err;
|
||||
}
|
||||
|
||||
printk(UM_KERN_INFO "vde backend - connection opened\n");
|
||||
|
||||
pri->conn = conn;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vde_user_open(void *data)
|
||||
{
|
||||
struct vde_data *pri = data;
|
||||
|
||||
if (pri->conn != NULL)
|
||||
return vde_datafd(pri->conn);
|
||||
|
||||
printk(UM_KERN_WARNING "vde_open - we have no VDECONN to open");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static void vde_remove(void *data)
|
||||
{
|
||||
struct vde_data *pri = data;
|
||||
|
||||
if (pri->conn != NULL) {
|
||||
printk(UM_KERN_INFO "vde backend - closing connection\n");
|
||||
vde_close(pri->conn);
|
||||
pri->conn = NULL;
|
||||
kfree(pri->args);
|
||||
pri->args = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
printk(UM_KERN_WARNING "vde_remove - we have no VDECONN to remove");
|
||||
}
|
||||
|
||||
const struct net_user_info vde_user_info = {
|
||||
.init = vde_user_init,
|
||||
.open = vde_user_open,
|
||||
.close = NULL,
|
||||
.remove = vde_remove,
|
||||
.add_address = NULL,
|
||||
.delete_address = NULL,
|
||||
.mtu = ETH_MAX_PACKET,
|
||||
.max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
|
||||
};
|
||||
|
||||
void vde_init_libstuff(struct vde_data *vpri, struct vde_init *init)
|
||||
{
|
||||
struct vde_open_args *args;
|
||||
|
||||
vpri->args = uml_kmalloc(sizeof(struct vde_open_args), UM_GFP_KERNEL);
|
||||
if (vpri->args == NULL) {
|
||||
printk(UM_KERN_ERR "vde_init_libstuff - vde_open_args "
|
||||
"allocation failed");
|
||||
return;
|
||||
}
|
||||
|
||||
args = vpri->args;
|
||||
|
||||
args->port = init->port;
|
||||
args->group = init->group;
|
||||
args->mode = init->mode ? init->mode : 0700;
|
||||
|
||||
args->port ? printk("port %d", args->port) :
|
||||
printk("undefined port");
|
||||
}
|
||||
|
||||
int vde_user_read(void *conn, void *buf, int len)
|
||||
{
|
||||
VDECONN *vconn = conn;
|
||||
int rv;
|
||||
|
||||
if (vconn == NULL)
|
||||
return 0;
|
||||
|
||||
rv = vde_recv(vconn, buf, len, 0);
|
||||
if (rv < 0) {
|
||||
if (errno == EAGAIN)
|
||||
return 0;
|
||||
return -errno;
|
||||
}
|
||||
else if (rv == 0)
|
||||
return -ENOTCONN;
|
||||
|
||||
return rv;
|
||||
}
|
||||
|
||||
int vde_user_write(void *conn, void *buf, int len)
|
||||
{
|
||||
VDECONN *vconn = conn;
|
||||
|
||||
if (vconn == NULL)
|
||||
return 0;
|
||||
|
||||
return vde_send(vconn, buf, len, 0);
|
||||
}
|
||||
|
|
@ -8,6 +8,8 @@
|
|||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "uml-vector: " fmt
|
||||
|
||||
#include <linux/memblock.h>
|
||||
#include <linux/etherdevice.h>
|
||||
#include <linux/ethtool.h>
|
||||
|
@ -27,7 +29,6 @@
|
|||
#include <init.h>
|
||||
#include <irq_kern.h>
|
||||
#include <irq_user.h>
|
||||
#include <net_kern.h>
|
||||
#include <os.h>
|
||||
#include "mconsole_kern.h"
|
||||
#include "vector_user.h"
|
||||
|
@ -1539,7 +1540,41 @@ static void vector_timer_expire(struct timer_list *t)
|
|||
napi_schedule(&vp->napi);
|
||||
}
|
||||
|
||||
static void vector_setup_etheraddr(struct net_device *dev, char *str)
|
||||
{
|
||||
u8 addr[ETH_ALEN];
|
||||
|
||||
if (str == NULL)
|
||||
goto random;
|
||||
|
||||
if (!mac_pton(str, addr)) {
|
||||
netdev_err(dev,
|
||||
"Failed to parse '%s' as an ethernet address\n", str);
|
||||
goto random;
|
||||
}
|
||||
if (is_multicast_ether_addr(addr)) {
|
||||
netdev_err(dev,
|
||||
"Attempt to assign a multicast ethernet address to a device disallowed\n");
|
||||
goto random;
|
||||
}
|
||||
if (!is_valid_ether_addr(addr)) {
|
||||
netdev_err(dev,
|
||||
"Attempt to assign an invalid ethernet address to a device disallowed\n");
|
||||
goto random;
|
||||
}
|
||||
if (!is_local_ether_addr(addr)) {
|
||||
netdev_warn(dev, "Warning: Assigning a globally valid ethernet address to a device\n");
|
||||
netdev_warn(dev, "You should set the 2nd rightmost bit in the first byte of the MAC,\n");
|
||||
netdev_warn(dev, "i.e. %02x:%02x:%02x:%02x:%02x:%02x\n",
|
||||
addr[0] | 0x02, addr[1], addr[2], addr[3], addr[4], addr[5]);
|
||||
}
|
||||
eth_hw_addr_set(dev, addr);
|
||||
return;
|
||||
|
||||
random:
|
||||
netdev_info(dev, "Choosing a random ethernet address\n");
|
||||
eth_hw_addr_random(dev);
|
||||
}
|
||||
|
||||
static void vector_eth_configure(
|
||||
int n,
|
||||
|
@ -1553,14 +1588,12 @@ static void vector_eth_configure(
|
|||
|
||||
device = kzalloc(sizeof(*device), GFP_KERNEL);
|
||||
if (device == NULL) {
|
||||
printk(KERN_ERR "eth_configure failed to allocate struct "
|
||||
"vector_device\n");
|
||||
pr_err("Failed to allocate struct vector_device for vec%d\n", n);
|
||||
return;
|
||||
}
|
||||
dev = alloc_etherdev(sizeof(struct vector_private));
|
||||
if (dev == NULL) {
|
||||
printk(KERN_ERR "eth_configure: failed to allocate struct "
|
||||
"net_device for vec%d\n", n);
|
||||
pr_err("Failed to allocate struct net_device for vec%d\n", n);
|
||||
goto out_free_device;
|
||||
}
|
||||
|
||||
|
@ -1574,7 +1607,7 @@ static void vector_eth_configure(
|
|||
* and fail.
|
||||
*/
|
||||
snprintf(dev->name, sizeof(dev->name), "vec%d", n);
|
||||
uml_net_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
|
||||
vector_setup_etheraddr(dev, uml_vector_fetch_arg(def, "mac"));
|
||||
vp = netdev_priv(dev);
|
||||
|
||||
/* sysfs register */
|
||||
|
@ -1690,8 +1723,7 @@ static int __init vector_setup(char *str)
|
|||
|
||||
err = vector_parse(str, &n, &str, &error);
|
||||
if (err) {
|
||||
printk(KERN_ERR "vector_setup - Couldn't parse '%s' : %s\n",
|
||||
str, error);
|
||||
pr_err("Couldn't parse '%s': %s\n", str, error);
|
||||
return 1;
|
||||
}
|
||||
new = memblock_alloc_or_panic(sizeof(*new), SMP_CACHE_BYTES);
|
||||
|
|
642
arch/um/drivers/vfio_kern.c
Normal file
642
arch/um/drivers/vfio_kern.c
Normal file
|
@ -0,0 +1,642 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2025 Ant Group
|
||||
* Author: Tiwei Bie <tiwei.btw@antgroup.com>
|
||||
*/
|
||||
|
||||
#define pr_fmt(fmt) "vfio-uml: " fmt
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/logic_iomem.h>
|
||||
#include <linux/mutex.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/unaligned.h>
|
||||
#include <irq_kern.h>
|
||||
#include <init.h>
|
||||
#include <os.h>
|
||||
|
||||
#include "virt-pci.h"
|
||||
#include "vfio_user.h"
|
||||
|
||||
#define to_vdev(_pdev) container_of(_pdev, struct uml_vfio_device, pdev)
|
||||
|
||||
struct uml_vfio_intr_ctx {
|
||||
struct uml_vfio_device *dev;
|
||||
int irq;
|
||||
};
|
||||
|
||||
struct uml_vfio_device {
|
||||
const char *name;
|
||||
int group;
|
||||
|
||||
struct um_pci_device pdev;
|
||||
struct uml_vfio_user_device udev;
|
||||
struct uml_vfio_intr_ctx *intr_ctx;
|
||||
|
||||
int msix_cap;
|
||||
int msix_bar;
|
||||
int msix_offset;
|
||||
int msix_size;
|
||||
u32 *msix_data;
|
||||
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
struct uml_vfio_group {
|
||||
int id;
|
||||
int fd;
|
||||
int users;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
static struct {
|
||||
int fd;
|
||||
int users;
|
||||
} uml_vfio_container = { .fd = -1 };
|
||||
static DEFINE_MUTEX(uml_vfio_container_mtx);
|
||||
|
||||
static LIST_HEAD(uml_vfio_groups);
|
||||
static DEFINE_MUTEX(uml_vfio_groups_mtx);
|
||||
|
||||
static LIST_HEAD(uml_vfio_devices);
|
||||
|
||||
static int uml_vfio_set_container(int group_fd)
|
||||
{
|
||||
int err;
|
||||
|
||||
guard(mutex)(¨_vfio_container_mtx);
|
||||
|
||||
err = uml_vfio_user_set_container(uml_vfio_container.fd, group_fd);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
uml_vfio_container.users++;
|
||||
if (uml_vfio_container.users > 1)
|
||||
return 0;
|
||||
|
||||
err = uml_vfio_user_setup_iommu(uml_vfio_container.fd);
|
||||
if (err) {
|
||||
uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
|
||||
uml_vfio_container.users--;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void uml_vfio_unset_container(int group_fd)
|
||||
{
|
||||
guard(mutex)(¨_vfio_container_mtx);
|
||||
|
||||
uml_vfio_user_unset_container(uml_vfio_container.fd, group_fd);
|
||||
uml_vfio_container.users--;
|
||||
}
|
||||
|
||||
static int uml_vfio_open_group(int group_id)
|
||||
{
|
||||
struct uml_vfio_group *group;
|
||||
int err;
|
||||
|
||||
guard(mutex)(¨_vfio_groups_mtx);
|
||||
|
||||
list_for_each_entry(group, ¨_vfio_groups, list) {
|
||||
if (group->id == group_id) {
|
||||
group->users++;
|
||||
return group->fd;
|
||||
}
|
||||
}
|
||||
|
||||
group = kzalloc(sizeof(*group), GFP_KERNEL);
|
||||
if (!group)
|
||||
return -ENOMEM;
|
||||
|
||||
group->fd = uml_vfio_user_open_group(group_id);
|
||||
if (group->fd < 0) {
|
||||
err = group->fd;
|
||||
goto free_group;
|
||||
}
|
||||
|
||||
err = uml_vfio_set_container(group->fd);
|
||||
if (err)
|
||||
goto close_group;
|
||||
|
||||
group->id = group_id;
|
||||
group->users = 1;
|
||||
|
||||
list_add(&group->list, ¨_vfio_groups);
|
||||
|
||||
return group->fd;
|
||||
|
||||
close_group:
|
||||
os_close_file(group->fd);
|
||||
free_group:
|
||||
kfree(group);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int uml_vfio_release_group(int group_fd)
|
||||
{
|
||||
struct uml_vfio_group *group;
|
||||
|
||||
guard(mutex)(¨_vfio_groups_mtx);
|
||||
|
||||
list_for_each_entry(group, ¨_vfio_groups, list) {
|
||||
if (group->fd == group_fd) {
|
||||
group->users--;
|
||||
if (group->users == 0) {
|
||||
uml_vfio_unset_container(group_fd);
|
||||
os_close_file(group_fd);
|
||||
list_del(&group->list);
|
||||
kfree(group);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
static irqreturn_t uml_vfio_interrupt(int unused, void *opaque)
|
||||
{
|
||||
struct uml_vfio_intr_ctx *ctx = opaque;
|
||||
struct uml_vfio_device *dev = ctx->dev;
|
||||
int index = ctx - dev->intr_ctx;
|
||||
int irqfd = dev->udev.irqfd[index];
|
||||
int irq = dev->msix_data[index];
|
||||
uint64_t v;
|
||||
int r;
|
||||
|
||||
do {
|
||||
r = os_read_file(irqfd, &v, sizeof(v));
|
||||
if (r == sizeof(v))
|
||||
generic_handle_irq(irq);
|
||||
} while (r == sizeof(v) || r == -EINTR);
|
||||
WARN(r != -EAGAIN, "read returned %d\n", r);
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int uml_vfio_activate_irq(struct uml_vfio_device *dev, int index)
|
||||
{
|
||||
struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
|
||||
int err, irqfd;
|
||||
|
||||
if (ctx->irq >= 0)
|
||||
return 0;
|
||||
|
||||
irqfd = uml_vfio_user_activate_irq(&dev->udev, index);
|
||||
if (irqfd < 0)
|
||||
return irqfd;
|
||||
|
||||
ctx->irq = um_request_irq(UM_IRQ_ALLOC, irqfd, IRQ_READ,
|
||||
uml_vfio_interrupt, 0,
|
||||
"vfio-uml", ctx);
|
||||
if (ctx->irq < 0) {
|
||||
err = ctx->irq;
|
||||
goto deactivate;
|
||||
}
|
||||
|
||||
err = add_sigio_fd(irqfd);
|
||||
if (err)
|
||||
goto free_irq;
|
||||
|
||||
return 0;
|
||||
|
||||
free_irq:
|
||||
um_free_irq(ctx->irq, ctx);
|
||||
ctx->irq = -1;
|
||||
deactivate:
|
||||
uml_vfio_user_deactivate_irq(&dev->udev, index);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int uml_vfio_deactivate_irq(struct uml_vfio_device *dev, int index)
|
||||
{
|
||||
struct uml_vfio_intr_ctx *ctx = &dev->intr_ctx[index];
|
||||
|
||||
if (ctx->irq >= 0) {
|
||||
ignore_sigio_fd(dev->udev.irqfd[index]);
|
||||
um_free_irq(ctx->irq, ctx);
|
||||
uml_vfio_user_deactivate_irq(&dev->udev, index);
|
||||
ctx->irq = -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uml_vfio_update_msix_cap(struct uml_vfio_device *dev,
|
||||
unsigned int offset, int size,
|
||||
unsigned long val)
|
||||
{
|
||||
/*
|
||||
* Here, we handle only the operations we care about,
|
||||
* ignoring the rest.
|
||||
*/
|
||||
if (size == 2 && offset == dev->msix_cap + PCI_MSIX_FLAGS) {
|
||||
switch (val & ~PCI_MSIX_FLAGS_QSIZE) {
|
||||
case PCI_MSIX_FLAGS_ENABLE:
|
||||
case 0:
|
||||
return uml_vfio_user_update_irqs(&dev->udev);
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uml_vfio_update_msix_table(struct uml_vfio_device *dev,
|
||||
unsigned int offset, int size,
|
||||
unsigned long val)
|
||||
{
|
||||
int index;
|
||||
|
||||
/*
|
||||
* Here, we handle only the operations we care about,
|
||||
* ignoring the rest.
|
||||
*/
|
||||
offset -= dev->msix_offset + PCI_MSIX_ENTRY_DATA;
|
||||
|
||||
if (size != 4 || offset % PCI_MSIX_ENTRY_SIZE != 0)
|
||||
return 0;
|
||||
|
||||
index = offset / PCI_MSIX_ENTRY_SIZE;
|
||||
if (index >= dev->udev.irq_count)
|
||||
return -EINVAL;
|
||||
|
||||
dev->msix_data[index] = val;
|
||||
|
||||
return val ? uml_vfio_activate_irq(dev, index) :
|
||||
uml_vfio_deactivate_irq(dev, index);
|
||||
}
|
||||
|
||||
static unsigned long __uml_vfio_cfgspace_read(struct uml_vfio_device *dev,
|
||||
unsigned int offset, int size)
|
||||
{
|
||||
u8 data[8];
|
||||
|
||||
memset(data, 0xff, sizeof(data));
|
||||
|
||||
if (uml_vfio_user_cfgspace_read(&dev->udev, offset, data, size))
|
||||
return ULONG_MAX;
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
return data[0];
|
||||
case 2:
|
||||
return le16_to_cpup((void *)data);
|
||||
case 4:
|
||||
return le32_to_cpup((void *)data);
|
||||
#ifdef CONFIG_64BIT
|
||||
case 8:
|
||||
return le64_to_cpup((void *)data);
|
||||
#endif
|
||||
default:
|
||||
return ULONG_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long uml_vfio_cfgspace_read(struct um_pci_device *pdev,
|
||||
unsigned int offset, int size)
|
||||
{
|
||||
struct uml_vfio_device *dev = to_vdev(pdev);
|
||||
|
||||
return __uml_vfio_cfgspace_read(dev, offset, size);
|
||||
}
|
||||
|
||||
static void __uml_vfio_cfgspace_write(struct uml_vfio_device *dev,
|
||||
unsigned int offset, int size,
|
||||
unsigned long val)
|
||||
{
|
||||
u8 data[8];
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
data[0] = (u8)val;
|
||||
break;
|
||||
case 2:
|
||||
put_unaligned_le16(val, (void *)data);
|
||||
break;
|
||||
case 4:
|
||||
put_unaligned_le32(val, (void *)data);
|
||||
break;
|
||||
#ifdef CONFIG_64BIT
|
||||
case 8:
|
||||
put_unaligned_le64(val, (void *)data);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
WARN_ON(uml_vfio_user_cfgspace_write(&dev->udev, offset, data, size));
|
||||
}
|
||||
|
||||
static void uml_vfio_cfgspace_write(struct um_pci_device *pdev,
|
||||
unsigned int offset, int size,
|
||||
unsigned long val)
|
||||
{
|
||||
struct uml_vfio_device *dev = to_vdev(pdev);
|
||||
|
||||
if (offset < dev->msix_cap + PCI_CAP_MSIX_SIZEOF &&
|
||||
offset + size > dev->msix_cap)
|
||||
WARN_ON(uml_vfio_update_msix_cap(dev, offset, size, val));
|
||||
|
||||
__uml_vfio_cfgspace_write(dev, offset, size, val);
|
||||
}
|
||||
|
||||
static void uml_vfio_bar_copy_from(struct um_pci_device *pdev, int bar,
|
||||
void *buffer, unsigned int offset, int size)
|
||||
{
|
||||
struct uml_vfio_device *dev = to_vdev(pdev);
|
||||
|
||||
memset(buffer, 0xff, size);
|
||||
uml_vfio_user_bar_read(&dev->udev, bar, offset, buffer, size);
|
||||
}
|
||||
|
||||
static unsigned long uml_vfio_bar_read(struct um_pci_device *pdev, int bar,
|
||||
unsigned int offset, int size)
|
||||
{
|
||||
u8 data[8];
|
||||
|
||||
uml_vfio_bar_copy_from(pdev, bar, data, offset, size);
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
return data[0];
|
||||
case 2:
|
||||
return le16_to_cpup((void *)data);
|
||||
case 4:
|
||||
return le32_to_cpup((void *)data);
|
||||
#ifdef CONFIG_64BIT
|
||||
case 8:
|
||||
return le64_to_cpup((void *)data);
|
||||
#endif
|
||||
default:
|
||||
return ULONG_MAX;
|
||||
}
|
||||
}
|
||||
|
||||
static void uml_vfio_bar_copy_to(struct um_pci_device *pdev, int bar,
|
||||
unsigned int offset, const void *buffer,
|
||||
int size)
|
||||
{
|
||||
struct uml_vfio_device *dev = to_vdev(pdev);
|
||||
|
||||
uml_vfio_user_bar_write(&dev->udev, bar, offset, buffer, size);
|
||||
}
|
||||
|
||||
static void uml_vfio_bar_write(struct um_pci_device *pdev, int bar,
|
||||
unsigned int offset, int size,
|
||||
unsigned long val)
|
||||
{
|
||||
struct uml_vfio_device *dev = to_vdev(pdev);
|
||||
u8 data[8];
|
||||
|
||||
if (bar == dev->msix_bar && offset + size > dev->msix_offset &&
|
||||
offset < dev->msix_offset + dev->msix_size)
|
||||
WARN_ON(uml_vfio_update_msix_table(dev, offset, size, val));
|
||||
|
||||
switch (size) {
|
||||
case 1:
|
||||
data[0] = (u8)val;
|
||||
break;
|
||||
case 2:
|
||||
put_unaligned_le16(val, (void *)data);
|
||||
break;
|
||||
case 4:
|
||||
put_unaligned_le32(val, (void *)data);
|
||||
break;
|
||||
#ifdef CONFIG_64BIT
|
||||
case 8:
|
||||
put_unaligned_le64(val, (void *)data);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
||||
uml_vfio_bar_copy_to(pdev, bar, offset, data, size);
|
||||
}
|
||||
|
||||
static void uml_vfio_bar_set(struct um_pci_device *pdev, int bar,
|
||||
unsigned int offset, u8 value, int size)
|
||||
{
|
||||
struct uml_vfio_device *dev = to_vdev(pdev);
|
||||
int i;
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
uml_vfio_user_bar_write(&dev->udev, bar, offset + i, &value, 1);
|
||||
}
|
||||
|
||||
static const struct um_pci_ops uml_vfio_um_pci_ops = {
|
||||
.cfgspace_read = uml_vfio_cfgspace_read,
|
||||
.cfgspace_write = uml_vfio_cfgspace_write,
|
||||
.bar_read = uml_vfio_bar_read,
|
||||
.bar_write = uml_vfio_bar_write,
|
||||
.bar_copy_from = uml_vfio_bar_copy_from,
|
||||
.bar_copy_to = uml_vfio_bar_copy_to,
|
||||
.bar_set = uml_vfio_bar_set,
|
||||
};
|
||||
|
||||
static u8 uml_vfio_find_capability(struct uml_vfio_device *dev, u8 cap)
|
||||
{
|
||||
u8 id, pos;
|
||||
u16 ent;
|
||||
int ttl = 48; /* PCI_FIND_CAP_TTL */
|
||||
|
||||
pos = __uml_vfio_cfgspace_read(dev, PCI_CAPABILITY_LIST, sizeof(pos));
|
||||
|
||||
while (pos && ttl--) {
|
||||
ent = __uml_vfio_cfgspace_read(dev, pos, sizeof(ent));
|
||||
|
||||
id = ent & 0xff;
|
||||
if (id == 0xff)
|
||||
break;
|
||||
if (id == cap)
|
||||
return pos;
|
||||
|
||||
pos = ent >> 8;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uml_vfio_read_msix_table(struct uml_vfio_device *dev)
|
||||
{
|
||||
unsigned int off;
|
||||
u16 flags;
|
||||
u32 tbl;
|
||||
|
||||
off = uml_vfio_find_capability(dev, PCI_CAP_ID_MSIX);
|
||||
if (!off)
|
||||
return -ENOTSUPP;
|
||||
|
||||
dev->msix_cap = off;
|
||||
|
||||
tbl = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_TABLE, sizeof(tbl));
|
||||
flags = __uml_vfio_cfgspace_read(dev, off + PCI_MSIX_FLAGS, sizeof(flags));
|
||||
|
||||
dev->msix_bar = tbl & PCI_MSIX_TABLE_BIR;
|
||||
dev->msix_offset = tbl & PCI_MSIX_TABLE_OFFSET;
|
||||
dev->msix_size = ((flags & PCI_MSIX_FLAGS_QSIZE) + 1) * PCI_MSIX_ENTRY_SIZE;
|
||||
|
||||
dev->msix_data = kzalloc(dev->msix_size, GFP_KERNEL);
|
||||
if (!dev->msix_data)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void uml_vfio_open_device(struct uml_vfio_device *dev)
|
||||
{
|
||||
struct uml_vfio_intr_ctx *ctx;
|
||||
int err, group_id, i;
|
||||
|
||||
group_id = uml_vfio_user_get_group_id(dev->name);
|
||||
if (group_id < 0) {
|
||||
pr_err("Failed to get group id (%s), error %d\n",
|
||||
dev->name, group_id);
|
||||
goto free_dev;
|
||||
}
|
||||
|
||||
dev->group = uml_vfio_open_group(group_id);
|
||||
if (dev->group < 0) {
|
||||
pr_err("Failed to open group %d (%s), error %d\n",
|
||||
group_id, dev->name, dev->group);
|
||||
goto free_dev;
|
||||
}
|
||||
|
||||
err = uml_vfio_user_setup_device(&dev->udev, dev->group, dev->name);
|
||||
if (err) {
|
||||
pr_err("Failed to setup device (%s), error %d\n",
|
||||
dev->name, err);
|
||||
goto release_group;
|
||||
}
|
||||
|
||||
err = uml_vfio_read_msix_table(dev);
|
||||
if (err) {
|
||||
pr_err("Failed to read MSI-X table (%s), error %d\n",
|
||||
dev->name, err);
|
||||
goto teardown_udev;
|
||||
}
|
||||
|
||||
dev->intr_ctx = kmalloc_array(dev->udev.irq_count,
|
||||
sizeof(struct uml_vfio_intr_ctx),
|
||||
GFP_KERNEL);
|
||||
if (!dev->intr_ctx) {
|
||||
pr_err("Failed to allocate interrupt context (%s)\n",
|
||||
dev->name);
|
||||
goto free_msix;
|
||||
}
|
||||
|
||||
for (i = 0; i < dev->udev.irq_count; i++) {
|
||||
ctx = &dev->intr_ctx[i];
|
||||
ctx->dev = dev;
|
||||
ctx->irq = -1;
|
||||
}
|
||||
|
||||
dev->pdev.ops = ¨_vfio_um_pci_ops;
|
||||
|
||||
err = um_pci_device_register(&dev->pdev);
|
||||
if (err) {
|
||||
pr_err("Failed to register UM PCI device (%s), error %d\n",
|
||||
dev->name, err);
|
||||
goto free_intr_ctx;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
free_intr_ctx:
|
||||
kfree(dev->intr_ctx);
|
||||
free_msix:
|
||||
kfree(dev->msix_data);
|
||||
teardown_udev:
|
||||
uml_vfio_user_teardown_device(&dev->udev);
|
||||
release_group:
|
||||
uml_vfio_release_group(dev->group);
|
||||
free_dev:
|
||||
list_del(&dev->list);
|
||||
kfree(dev->name);
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static void uml_vfio_release_device(struct uml_vfio_device *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < dev->udev.irq_count; i++)
|
||||
uml_vfio_deactivate_irq(dev, i);
|
||||
uml_vfio_user_update_irqs(&dev->udev);
|
||||
|
||||
um_pci_device_unregister(&dev->pdev);
|
||||
kfree(dev->intr_ctx);
|
||||
kfree(dev->msix_data);
|
||||
uml_vfio_user_teardown_device(&dev->udev);
|
||||
uml_vfio_release_group(dev->group);
|
||||
list_del(&dev->list);
|
||||
kfree(dev->name);
|
||||
kfree(dev);
|
||||
}
|
||||
|
||||
static int uml_vfio_cmdline_set(const char *device, const struct kernel_param *kp)
|
||||
{
|
||||
struct uml_vfio_device *dev;
|
||||
int fd;
|
||||
|
||||
if (uml_vfio_container.fd < 0) {
|
||||
fd = uml_vfio_user_open_container();
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
uml_vfio_container.fd = fd;
|
||||
}
|
||||
|
||||
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
|
||||
if (!dev)
|
||||
return -ENOMEM;
|
||||
|
||||
dev->name = kstrdup(device, GFP_KERNEL);
|
||||
if (!dev->name) {
|
||||
kfree(dev);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
list_add_tail(&dev->list, ¨_vfio_devices);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int uml_vfio_cmdline_get(char *buffer, const struct kernel_param *kp)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops uml_vfio_cmdline_param_ops = {
|
||||
.set = uml_vfio_cmdline_set,
|
||||
.get = uml_vfio_cmdline_get,
|
||||
};
|
||||
|
||||
device_param_cb(device, ¨_vfio_cmdline_param_ops, NULL, 0400);
|
||||
__uml_help(uml_vfio_cmdline_param_ops,
|
||||
"vfio_uml.device=<domain:bus:slot.function>\n"
|
||||
" Pass through a PCI device to UML via VFIO. Currently, only MSI-X\n"
|
||||
" capable devices are supported, and it is assumed that drivers will\n"
|
||||
" use MSI-X. This parameter can be specified multiple times to pass\n"
|
||||
" through multiple PCI devices to UML.\n\n"
|
||||
);
|
||||
|
||||
static int __init uml_vfio_init(void)
|
||||
{
|
||||
struct uml_vfio_device *dev, *n;
|
||||
|
||||
sigio_broken();
|
||||
|
||||
/* If the opening fails, the device will be released. */
|
||||
list_for_each_entry_safe(dev, n, ¨_vfio_devices, list)
|
||||
uml_vfio_open_device(dev);
|
||||
|
||||
return 0;
|
||||
}
|
||||
late_initcall(uml_vfio_init);
|
||||
|
||||
static void __exit uml_vfio_exit(void)
|
||||
{
|
||||
struct uml_vfio_device *dev, *n;
|
||||
|
||||
list_for_each_entry_safe(dev, n, ¨_vfio_devices, list)
|
||||
uml_vfio_release_device(dev);
|
||||
|
||||
if (uml_vfio_container.fd >= 0)
|
||||
os_close_file(uml_vfio_container.fd);
|
||||
}
|
||||
module_exit(uml_vfio_exit);
|
327
arch/um/drivers/vfio_user.c
Normal file
327
arch/um/drivers/vfio_user.c
Normal file
|
@ -0,0 +1,327 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2025 Ant Group
|
||||
* Author: Tiwei Bie <tiwei.btw@antgroup.com>
|
||||
*/
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <linux/limits.h>
|
||||
#include <linux/vfio.h>
|
||||
#include <linux/pci_regs.h>
|
||||
#include <as-layout.h>
|
||||
#include <um_malloc.h>
|
||||
|
||||
#include "vfio_user.h"
|
||||
|
||||
int uml_vfio_user_open_container(void)
|
||||
{
|
||||
int r, fd;
|
||||
|
||||
fd = open("/dev/vfio/vfio", O_RDWR);
|
||||
if (fd < 0)
|
||||
return -errno;
|
||||
|
||||
r = ioctl(fd, VFIO_GET_API_VERSION);
|
||||
if (r != VFIO_API_VERSION) {
|
||||
r = r < 0 ? -errno : -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
r = ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1_IOMMU);
|
||||
if (r <= 0) {
|
||||
r = r < 0 ? -errno : -EINVAL;
|
||||
goto error;
|
||||
}
|
||||
|
||||
return fd;
|
||||
|
||||
error:
|
||||
close(fd);
|
||||
return r;
|
||||
}
|
||||
|
||||
int uml_vfio_user_setup_iommu(int container)
|
||||
{
|
||||
/*
|
||||
* This is a bit tricky. See the big comment in
|
||||
* vhost_user_set_mem_table() in virtio_uml.c.
|
||||
*/
|
||||
unsigned long reserved = uml_reserved - uml_physmem;
|
||||
struct vfio_iommu_type1_dma_map dma_map = {
|
||||
.argsz = sizeof(dma_map),
|
||||
.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
|
||||
.vaddr = uml_reserved,
|
||||
.iova = reserved,
|
||||
.size = physmem_size - reserved,
|
||||
};
|
||||
|
||||
if (ioctl(container, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU) < 0)
|
||||
return -errno;
|
||||
|
||||
if (ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uml_vfio_user_get_group_id(const char *device)
|
||||
{
|
||||
char *path, *buf, *end;
|
||||
const char *name;
|
||||
int r;
|
||||
|
||||
path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
sprintf(path, "/sys/bus/pci/devices/%s/iommu_group", device);
|
||||
|
||||
buf = uml_kmalloc(PATH_MAX + 1, UM_GFP_KERNEL);
|
||||
if (!buf) {
|
||||
r = -ENOMEM;
|
||||
goto free_path;
|
||||
}
|
||||
|
||||
r = readlink(path, buf, PATH_MAX);
|
||||
if (r < 0) {
|
||||
r = -errno;
|
||||
goto free_buf;
|
||||
}
|
||||
buf[r] = '\0';
|
||||
|
||||
name = basename(buf);
|
||||
|
||||
r = strtoul(name, &end, 10);
|
||||
if (*end != '\0' || end == name) {
|
||||
r = -EINVAL;
|
||||
goto free_buf;
|
||||
}
|
||||
|
||||
free_buf:
|
||||
kfree(buf);
|
||||
free_path:
|
||||
kfree(path);
|
||||
return r;
|
||||
}
|
||||
|
||||
int uml_vfio_user_open_group(int group_id)
|
||||
{
|
||||
char *path;
|
||||
int fd;
|
||||
|
||||
path = uml_kmalloc(PATH_MAX, UM_GFP_KERNEL);
|
||||
if (!path)
|
||||
return -ENOMEM;
|
||||
|
||||
sprintf(path, "/dev/vfio/%d", group_id);
|
||||
|
||||
fd = open(path, O_RDWR);
|
||||
if (fd < 0) {
|
||||
fd = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(path);
|
||||
return fd;
|
||||
}
|
||||
|
||||
int uml_vfio_user_set_container(int container, int group)
|
||||
{
|
||||
if (ioctl(group, VFIO_GROUP_SET_CONTAINER, &container) < 0)
|
||||
return -errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uml_vfio_user_unset_container(int container, int group)
|
||||
{
|
||||
if (ioctl(group, VFIO_GROUP_UNSET_CONTAINER, &container) < 0)
|
||||
return -errno;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_set_irqs(int device, int start, int count, int *irqfd)
|
||||
{
|
||||
struct vfio_irq_set *irq_set;
|
||||
int argsz = sizeof(*irq_set) + sizeof(*irqfd) * count;
|
||||
int err = 0;
|
||||
|
||||
irq_set = uml_kmalloc(argsz, UM_GFP_KERNEL);
|
||||
if (!irq_set)
|
||||
return -ENOMEM;
|
||||
|
||||
irq_set->argsz = argsz;
|
||||
irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER;
|
||||
irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX;
|
||||
irq_set->start = start;
|
||||
irq_set->count = count;
|
||||
memcpy(irq_set->data, irqfd, sizeof(*irqfd) * count);
|
||||
|
||||
if (ioctl(device, VFIO_DEVICE_SET_IRQS, irq_set) < 0) {
|
||||
err = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
kfree(irq_set);
|
||||
return err;
|
||||
}
|
||||
|
||||
int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
|
||||
int group, const char *device)
|
||||
{
|
||||
struct vfio_device_info device_info = { .argsz = sizeof(device_info) };
|
||||
struct vfio_irq_info irq_info = { .argsz = sizeof(irq_info) };
|
||||
int err, i;
|
||||
|
||||
dev->device = ioctl(group, VFIO_GROUP_GET_DEVICE_FD, device);
|
||||
if (dev->device < 0)
|
||||
return -errno;
|
||||
|
||||
if (ioctl(dev->device, VFIO_DEVICE_GET_INFO, &device_info) < 0) {
|
||||
err = -errno;
|
||||
goto close_device;
|
||||
}
|
||||
|
||||
dev->num_regions = device_info.num_regions;
|
||||
if (dev->num_regions > VFIO_PCI_CONFIG_REGION_INDEX + 1)
|
||||
dev->num_regions = VFIO_PCI_CONFIG_REGION_INDEX + 1;
|
||||
|
||||
dev->region = uml_kmalloc(sizeof(*dev->region) * dev->num_regions,
|
||||
UM_GFP_KERNEL);
|
||||
if (!dev->region) {
|
||||
err = -ENOMEM;
|
||||
goto close_device;
|
||||
}
|
||||
|
||||
for (i = 0; i < dev->num_regions; i++) {
|
||||
struct vfio_region_info region = {
|
||||
.argsz = sizeof(region),
|
||||
.index = i,
|
||||
};
|
||||
if (ioctl(dev->device, VFIO_DEVICE_GET_REGION_INFO, ®ion) < 0) {
|
||||
err = -errno;
|
||||
goto free_region;
|
||||
}
|
||||
dev->region[i].size = region.size;
|
||||
dev->region[i].offset = region.offset;
|
||||
}
|
||||
|
||||
/* Only MSI-X is supported currently. */
|
||||
irq_info.index = VFIO_PCI_MSIX_IRQ_INDEX;
|
||||
if (ioctl(dev->device, VFIO_DEVICE_GET_IRQ_INFO, &irq_info) < 0) {
|
||||
err = -errno;
|
||||
goto free_region;
|
||||
}
|
||||
|
||||
dev->irq_count = irq_info.count;
|
||||
|
||||
dev->irqfd = uml_kmalloc(sizeof(int) * dev->irq_count, UM_GFP_KERNEL);
|
||||
if (!dev->irqfd) {
|
||||
err = -ENOMEM;
|
||||
goto free_region;
|
||||
}
|
||||
|
||||
memset(dev->irqfd, -1, sizeof(int) * dev->irq_count);
|
||||
|
||||
err = vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
|
||||
if (err)
|
||||
goto free_irqfd;
|
||||
|
||||
return 0;
|
||||
|
||||
free_irqfd:
|
||||
kfree(dev->irqfd);
|
||||
free_region:
|
||||
kfree(dev->region);
|
||||
close_device:
|
||||
close(dev->device);
|
||||
return err;
|
||||
}
|
||||
|
||||
void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev)
|
||||
{
|
||||
kfree(dev->irqfd);
|
||||
kfree(dev->region);
|
||||
close(dev->device);
|
||||
}
|
||||
|
||||
int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index)
|
||||
{
|
||||
int irqfd;
|
||||
|
||||
irqfd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
|
||||
if (irqfd < 0)
|
||||
return -errno;
|
||||
|
||||
dev->irqfd[index] = irqfd;
|
||||
return irqfd;
|
||||
}
|
||||
|
||||
void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index)
|
||||
{
|
||||
close(dev->irqfd[index]);
|
||||
dev->irqfd[index] = -1;
|
||||
}
|
||||
|
||||
int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev)
|
||||
{
|
||||
return vfio_set_irqs(dev->device, 0, dev->irq_count, dev->irqfd);
|
||||
}
|
||||
|
||||
static int vfio_region_read(struct uml_vfio_user_device *dev, unsigned int index,
|
||||
uint64_t offset, void *buf, uint64_t size)
|
||||
{
|
||||
if (index >= dev->num_regions || offset + size > dev->region[index].size)
|
||||
return -EINVAL;
|
||||
|
||||
if (pread(dev->device, buf, size, dev->region[index].offset + offset) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vfio_region_write(struct uml_vfio_user_device *dev, unsigned int index,
|
||||
uint64_t offset, const void *buf, uint64_t size)
|
||||
{
|
||||
if (index >= dev->num_regions || offset + size > dev->region[index].size)
|
||||
return -EINVAL;
|
||||
|
||||
if (pwrite(dev->device, buf, size, dev->region[index].offset + offset) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
|
||||
unsigned int offset, void *buf, int size)
|
||||
{
|
||||
return vfio_region_read(dev, VFIO_PCI_CONFIG_REGION_INDEX,
|
||||
offset, buf, size);
|
||||
}
|
||||
|
||||
int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
|
||||
unsigned int offset, const void *buf, int size)
|
||||
{
|
||||
return vfio_region_write(dev, VFIO_PCI_CONFIG_REGION_INDEX,
|
||||
offset, buf, size);
|
||||
}
|
||||
|
||||
int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
|
||||
unsigned int offset, void *buf, int size)
|
||||
{
|
||||
return vfio_region_read(dev, bar, offset, buf, size);
|
||||
}
|
||||
|
||||
int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
|
||||
unsigned int offset, const void *buf, int size)
|
||||
{
|
||||
return vfio_region_write(dev, bar, offset, buf, size);
|
||||
}
|
44
arch/um/drivers/vfio_user.h
Normal file
44
arch/um/drivers/vfio_user.h
Normal file
|
@ -0,0 +1,44 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __UM_VFIO_USER_H
|
||||
#define __UM_VFIO_USER_H
|
||||
|
||||
struct uml_vfio_user_device {
|
||||
int device;
|
||||
|
||||
struct {
|
||||
uint64_t size;
|
||||
uint64_t offset;
|
||||
} *region;
|
||||
int num_regions;
|
||||
|
||||
int32_t *irqfd;
|
||||
int irq_count;
|
||||
};
|
||||
|
||||
int uml_vfio_user_open_container(void);
|
||||
int uml_vfio_user_setup_iommu(int container);
|
||||
|
||||
int uml_vfio_user_get_group_id(const char *device);
|
||||
int uml_vfio_user_open_group(int group_id);
|
||||
int uml_vfio_user_set_container(int container, int group);
|
||||
int uml_vfio_user_unset_container(int container, int group);
|
||||
|
||||
int uml_vfio_user_setup_device(struct uml_vfio_user_device *dev,
|
||||
int group, const char *device);
|
||||
void uml_vfio_user_teardown_device(struct uml_vfio_user_device *dev);
|
||||
|
||||
int uml_vfio_user_activate_irq(struct uml_vfio_user_device *dev, int index);
|
||||
void uml_vfio_user_deactivate_irq(struct uml_vfio_user_device *dev, int index);
|
||||
int uml_vfio_user_update_irqs(struct uml_vfio_user_device *dev);
|
||||
|
||||
int uml_vfio_user_cfgspace_read(struct uml_vfio_user_device *dev,
|
||||
unsigned int offset, void *buf, int size);
|
||||
int uml_vfio_user_cfgspace_write(struct uml_vfio_user_device *dev,
|
||||
unsigned int offset, const void *buf, int size);
|
||||
|
||||
int uml_vfio_user_bar_read(struct uml_vfio_user_device *dev, int bar,
|
||||
unsigned int offset, void *buf, int size);
|
||||
int uml_vfio_user_bar_write(struct uml_vfio_user_device *dev, int bar,
|
||||
unsigned int offset, const void *buf, int size);
|
||||
|
||||
#endif /* __UM_VFIO_USER_H */
|
|
@ -538,11 +538,6 @@ void um_pci_platform_device_unregister(struct um_pci_device *dev)
|
|||
|
||||
static int __init um_pci_init(void)
|
||||
{
|
||||
struct irq_domain_info inner_domain_info = {
|
||||
.size = MAX_MSI_VECTORS,
|
||||
.hwirq_max = MAX_MSI_VECTORS,
|
||||
.ops = &um_pci_inner_domain_ops,
|
||||
};
|
||||
int err, i;
|
||||
|
||||
WARN_ON(logic_iomem_add_region(&virt_cfgspace_resource,
|
||||
|
@ -564,10 +559,10 @@ static int __init um_pci_init(void)
|
|||
goto free;
|
||||
}
|
||||
|
||||
inner_domain_info.fwnode = um_pci_fwnode;
|
||||
um_pci_inner_domain = irq_domain_instantiate(&inner_domain_info);
|
||||
if (IS_ERR(um_pci_inner_domain)) {
|
||||
err = PTR_ERR(um_pci_inner_domain);
|
||||
um_pci_inner_domain = irq_domain_create_linear(um_pci_fwnode, MAX_MSI_VECTORS,
|
||||
&um_pci_inner_domain_ops, NULL);
|
||||
if (!um_pci_inner_domain) {
|
||||
err = -ENOMEM;
|
||||
goto free;
|
||||
}
|
||||
|
||||
|
@ -602,7 +597,7 @@ static int __init um_pci_init(void)
|
|||
return 0;
|
||||
|
||||
free:
|
||||
if (!IS_ERR_OR_NULL(um_pci_inner_domain))
|
||||
if (um_pci_inner_domain)
|
||||
irq_domain_remove(um_pci_inner_domain);
|
||||
if (um_pci_fwnode)
|
||||
irq_domain_free_fwnode(um_pci_fwnode);
|
||||
|
|
|
@ -81,7 +81,7 @@ __uml_setup("xterm=", xterm_setup,
|
|||
" '<switch> command arg1 arg2 ...'.\n"
|
||||
" The default values are 'xterm=" CONFIG_XTERM_CHAN_DEFAULT_EMULATOR
|
||||
",-T,-e'.\n"
|
||||
" Values for gnome-terminal are 'xterm=gnome-terminal,-t,-x'.\n\n"
|
||||
" Values for gnome-terminal are 'xterm=gnome-terminal,-t,--'.\n\n"
|
||||
);
|
||||
|
||||
static int xterm_open(int input, int output, int primary, void *d,
|
||||
|
@ -97,12 +97,9 @@ static int xterm_open(int input, int output, int primary, void *d,
|
|||
if (access(argv[4], X_OK) < 0)
|
||||
argv[4] = "port-helper";
|
||||
|
||||
/*
|
||||
* Check that DISPLAY is set, this doesn't guarantee the xterm
|
||||
* will work but w/o it we can be pretty sure it won't.
|
||||
*/
|
||||
if (getenv("DISPLAY") == NULL) {
|
||||
printk(UM_KERN_ERR "xterm_open: $DISPLAY not set.\n");
|
||||
/* Ensure we are running on Xorg or Wayland. */
|
||||
if (!getenv("DISPLAY") && !getenv("WAYLAND_DISPLAY")) {
|
||||
printk(UM_KERN_ERR "xterm_open : neither $DISPLAY nor $WAYLAND_DISPLAY is set.\n");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
|
|
|
@ -1 +1,6 @@
|
|||
#include <asm-generic/asm-prototypes.h>
|
||||
#include <asm/checksum.h>
|
||||
|
||||
#ifdef CONFIG_UML_X86
|
||||
extern void cmpxchg8b_emu(void);
|
||||
#endif
|
||||
|
|
|
@ -13,17 +13,18 @@
|
|||
#define TELNETD_IRQ 8
|
||||
#define XTERM_IRQ 9
|
||||
#define RANDOM_IRQ 10
|
||||
#define SIGCHLD_IRQ 11
|
||||
|
||||
#ifdef CONFIG_UML_NET_VECTOR
|
||||
|
||||
#define VECTOR_BASE_IRQ (RANDOM_IRQ + 1)
|
||||
#define VECTOR_BASE_IRQ (SIGCHLD_IRQ + 1)
|
||||
#define VECTOR_IRQ_SPACE 8
|
||||
|
||||
#define UM_FIRST_DYN_IRQ (VECTOR_IRQ_SPACE + VECTOR_BASE_IRQ)
|
||||
|
||||
#else
|
||||
|
||||
#define UM_FIRST_DYN_IRQ (RANDOM_IRQ + 1)
|
||||
#define UM_FIRST_DYN_IRQ (SIGCHLD_IRQ + 1)
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -6,11 +6,14 @@
|
|||
#ifndef __ARCH_UM_MMU_H
|
||||
#define __ARCH_UM_MMU_H
|
||||
|
||||
#include "linux/types.h"
|
||||
#include <mm_id.h>
|
||||
|
||||
typedef struct mm_context {
|
||||
struct mm_id id;
|
||||
|
||||
struct list_head list;
|
||||
|
||||
/* Address range in need of a TLB sync */
|
||||
unsigned long sync_tlb_range_from;
|
||||
unsigned long sync_tlb_range_to;
|
||||
|
|
|
@ -14,3 +14,7 @@ DEFINE(UM_THREAD_SIZE, THREAD_SIZE);
|
|||
|
||||
DEFINE(UM_NSEC_PER_SEC, NSEC_PER_SEC);
|
||||
DEFINE(UM_NSEC_PER_USEC, NSEC_PER_USEC);
|
||||
|
||||
DEFINE(UM_KERN_GDT_ENTRY_TLS_ENTRIES, GDT_ENTRY_TLS_ENTRIES);
|
||||
|
||||
DEFINE(UM_SECCOMP_ARCH_NATIVE, SECCOMP_ARCH_NATIVE);
|
||||
|
|
|
@ -17,6 +17,8 @@ enum um_irq_type {
|
|||
struct siginfo;
|
||||
extern void sigio_handler(int sig, struct siginfo *unused_si,
|
||||
struct uml_pt_regs *regs, void *mc);
|
||||
extern void sigchld_handler(int sig, struct siginfo *unused_si,
|
||||
struct uml_pt_regs *regs, void *mc);
|
||||
void sigio_run_timetravel_handlers(void);
|
||||
extern void free_irq_by_fd(int fd);
|
||||
extern void deactivate_fd(int fd, int irqnum);
|
||||
|
|
|
@ -1,69 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2002 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#ifndef __UM_NET_KERN_H
|
||||
#define __UM_NET_KERN_H
|
||||
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/platform_device.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <linux/socket.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/workqueue.h>
|
||||
|
||||
struct uml_net {
|
||||
struct list_head list;
|
||||
struct net_device *dev;
|
||||
struct platform_device pdev;
|
||||
int index;
|
||||
};
|
||||
|
||||
struct uml_net_private {
|
||||
struct list_head list;
|
||||
spinlock_t lock;
|
||||
struct net_device *dev;
|
||||
struct timer_list tl;
|
||||
|
||||
struct work_struct work;
|
||||
int fd;
|
||||
unsigned char mac[ETH_ALEN];
|
||||
int max_packet;
|
||||
unsigned short (*protocol)(struct sk_buff *);
|
||||
int (*open)(void *);
|
||||
void (*close)(int, void *);
|
||||
void (*remove)(void *);
|
||||
int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
|
||||
int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
|
||||
|
||||
void (*add_address)(unsigned char *, unsigned char *, void *);
|
||||
void (*delete_address)(unsigned char *, unsigned char *, void *);
|
||||
char user[];
|
||||
};
|
||||
|
||||
struct net_kern_info {
|
||||
void (*init)(struct net_device *, void *);
|
||||
unsigned short (*protocol)(struct sk_buff *);
|
||||
int (*read)(int, struct sk_buff *skb, struct uml_net_private *);
|
||||
int (*write)(int, struct sk_buff *skb, struct uml_net_private *);
|
||||
};
|
||||
|
||||
struct transport {
|
||||
struct list_head list;
|
||||
const char *name;
|
||||
int (* const setup)(char *, char **, void *);
|
||||
const struct net_user_info *user;
|
||||
const struct net_kern_info *kern;
|
||||
const int private_size;
|
||||
const int setup_size;
|
||||
};
|
||||
|
||||
extern int tap_setup_common(char *str, char *type, char **dev_name,
|
||||
char **mac_out, char **gate_addr);
|
||||
extern void register_transport(struct transport *new);
|
||||
extern unsigned short eth_protocol(struct sk_buff *skb);
|
||||
extern void uml_net_setup_etheraddr(struct net_device *dev, char *str);
|
||||
|
||||
|
||||
#endif
|
|
@ -1,52 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2002 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#ifndef __UM_NET_USER_H__
|
||||
#define __UM_NET_USER_H__
|
||||
|
||||
#define ETH_ADDR_LEN (6)
|
||||
#define ETH_HEADER_ETHERTAP (16)
|
||||
#define ETH_HEADER_OTHER (26) /* 14 for ethernet + VLAN + MPLS for crazy people */
|
||||
#define ETH_MAX_PACKET (1500)
|
||||
|
||||
#define UML_NET_VERSION (4)
|
||||
|
||||
struct net_user_info {
|
||||
int (*init)(void *, void *);
|
||||
int (*open)(void *);
|
||||
void (*close)(int, void *);
|
||||
void (*remove)(void *);
|
||||
void (*add_address)(unsigned char *, unsigned char *, void *);
|
||||
void (*delete_address)(unsigned char *, unsigned char *, void *);
|
||||
int max_packet;
|
||||
int mtu;
|
||||
};
|
||||
|
||||
extern void iter_addresses(void *d, void (*cb)(unsigned char *,
|
||||
unsigned char *, void *),
|
||||
void *arg);
|
||||
|
||||
extern void *get_output_buffer(int *len_out);
|
||||
extern void free_output_buffer(void *buffer);
|
||||
|
||||
extern int tap_open_common(void *dev, char *gate_addr);
|
||||
extern void tap_check_ips(char *gate_addr, unsigned char *eth_addr);
|
||||
|
||||
extern void read_output(int fd, char *output_out, int len);
|
||||
|
||||
extern int net_read(int fd, void *buf, int len);
|
||||
extern int net_recvfrom(int fd, void *buf, int len);
|
||||
extern int net_write(int fd, void *buf, int len);
|
||||
extern int net_send(int fd, void *buf, int len);
|
||||
extern int net_sendto(int fd, void *buf, int len, void *to, int sock_len);
|
||||
|
||||
extern void open_addr(unsigned char *addr, unsigned char *netmask, void *arg);
|
||||
extern void close_addr(unsigned char *addr, unsigned char *netmask, void *arg);
|
||||
|
||||
extern char *split_if_spec(char *str, ...);
|
||||
|
||||
extern int dev_netmask(void *d, void *m);
|
||||
|
||||
#endif
|
|
@ -143,7 +143,6 @@ extern int os_access(const char *file, int mode);
|
|||
extern int os_set_exec_close(int fd);
|
||||
extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
|
||||
extern int os_get_ifname(int fd, char *namebuf);
|
||||
extern int os_set_slip(int fd);
|
||||
extern int os_mode_fd(int fd, int mode);
|
||||
|
||||
extern int os_seek_file(int fd, unsigned long long offset);
|
||||
|
@ -198,6 +197,7 @@ extern int create_mem_file(unsigned long long len);
|
|||
extern void report_enomem(void);
|
||||
|
||||
/* process.c */
|
||||
pid_t os_reap_child(void);
|
||||
extern void os_alarm_process(int pid);
|
||||
extern void os_kill_process(int pid, int reap_child);
|
||||
extern void os_kill_ptraced_process(int pid, int reap_child);
|
||||
|
@ -286,7 +286,7 @@ int unmap(struct mm_id *mm_idp, unsigned long addr, unsigned long len);
|
|||
|
||||
/* skas/process.c */
|
||||
extern int is_skas_winch(int pid, int fd, void *data);
|
||||
extern int start_userspace(unsigned long stub_stack);
|
||||
extern int start_userspace(struct mm_id *mm_id);
|
||||
extern void userspace(struct uml_pt_regs *regs);
|
||||
extern void new_thread(void *stack, jmp_buf *buf, void (*handler)(void));
|
||||
extern void switch_threads(jmp_buf *me, jmp_buf *you);
|
||||
|
|
|
@ -6,12 +6,21 @@
|
|||
#ifndef __MM_ID_H
|
||||
#define __MM_ID_H
|
||||
|
||||
#define STUB_MAX_FDS 4
|
||||
|
||||
struct mm_id {
|
||||
int pid;
|
||||
unsigned long stack;
|
||||
int syscall_data_len;
|
||||
|
||||
/* Only used with SECCOMP mode */
|
||||
int sock;
|
||||
int syscall_fd_num;
|
||||
int syscall_fd_map[STUB_MAX_FDS];
|
||||
};
|
||||
|
||||
void __switch_mm(struct mm_id *mm_idp);
|
||||
|
||||
void notify_mm_kill(int pid);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
#include <sysdep/ptrace.h>
|
||||
|
||||
extern int using_seccomp;
|
||||
extern int userspace_pid[];
|
||||
|
||||
extern void new_thread_handler(void);
|
||||
|
|
|
@ -11,8 +11,15 @@
|
|||
#include <linux/compiler_types.h>
|
||||
#include <as-layout.h>
|
||||
#include <sysdep/tls.h>
|
||||
#include <sysdep/stub-data.h>
|
||||
#include <mm_id.h>
|
||||
|
||||
#define FUTEX_IN_CHILD 0
|
||||
#define FUTEX_IN_KERN 1
|
||||
|
||||
struct stub_init_data {
|
||||
int seccomp;
|
||||
|
||||
unsigned long stub_start;
|
||||
|
||||
int stub_code_fd;
|
||||
|
@ -20,7 +27,8 @@ struct stub_init_data {
|
|||
int stub_data_fd;
|
||||
unsigned long stub_data_offset;
|
||||
|
||||
unsigned long segv_handler;
|
||||
unsigned long signal_handler;
|
||||
unsigned long signal_restorer;
|
||||
};
|
||||
|
||||
#define STUB_NEXT_SYSCALL(s) \
|
||||
|
@ -52,6 +60,16 @@ struct stub_data {
|
|||
/* 128 leaves enough room for additional fields in the struct */
|
||||
struct stub_syscall syscall_data[(UM_KERN_PAGE_SIZE - 128) / sizeof(struct stub_syscall)] __aligned(16);
|
||||
|
||||
/* data shared with signal handler (only used in seccomp mode) */
|
||||
short restart_wait;
|
||||
unsigned int futex;
|
||||
int signal;
|
||||
unsigned short si_offset;
|
||||
unsigned short mctx_offset;
|
||||
|
||||
/* seccomp architecture specific state restore */
|
||||
struct stub_data_arch arch_data;
|
||||
|
||||
/* Stack for our signal handlers and for calling into . */
|
||||
unsigned char sigstack[UM_KERN_PAGE_SIZE] __aligned(UM_KERN_PAGE_SIZE);
|
||||
};
|
||||
|
|
|
@ -25,7 +25,6 @@ obj-$(CONFIG_GPROF) += gprof_syms.o
|
|||
obj-$(CONFIG_OF) += dtb.o
|
||||
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
|
||||
obj-$(CONFIG_STACKTRACE) += stacktrace.o
|
||||
obj-$(CONFIG_GENERIC_PCI_IOMAP) += ioport.o
|
||||
|
||||
USER_OBJS := config.o
|
||||
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2021 Intel Corporation
|
||||
* Author: Johannes Berg <johannes@sipsolutions.net>
|
||||
*/
|
||||
#include <asm/iomap.h>
|
||||
#include <asm-generic/pci_iomap.h>
|
||||
|
||||
void __iomem *__pci_ioport_map(struct pci_dev *dev, unsigned long port,
|
||||
unsigned int nr)
|
||||
{
|
||||
return NULL;
|
||||
}
|
|
@ -690,3 +690,9 @@ void __init init_IRQ(void)
|
|||
/* Initialize EPOLL Loop */
|
||||
os_setup_epoll();
|
||||
}
|
||||
|
||||
void sigchld_handler(int sig, struct siginfo *unused_si,
|
||||
struct uml_pt_regs *regs, void *mc)
|
||||
{
|
||||
do_IRQ(SIGCHLD_IRQ, regs);
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <linux/sched/signal.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include <shared/irq_kern.h>
|
||||
#include <asm/pgalloc.h>
|
||||
#include <asm/sections.h>
|
||||
#include <asm/mmu_context.h>
|
||||
|
@ -19,6 +20,9 @@
|
|||
/* Ensure the stub_data struct covers the allocated area */
|
||||
static_assert(sizeof(struct stub_data) == STUB_DATA_PAGES * UM_KERN_PAGE_SIZE);
|
||||
|
||||
spinlock_t mm_list_lock;
|
||||
struct list_head mm_list;
|
||||
|
||||
int init_new_context(struct task_struct *task, struct mm_struct *mm)
|
||||
{
|
||||
struct mm_id *new_id = &mm->context.id;
|
||||
|
@ -31,15 +35,15 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
|
|||
|
||||
new_id->stack = stack;
|
||||
|
||||
block_signals_trace();
|
||||
new_id->pid = start_userspace(stack);
|
||||
unblock_signals_trace();
|
||||
|
||||
if (new_id->pid < 0) {
|
||||
ret = new_id->pid;
|
||||
goto out_free;
|
||||
scoped_guard(spinlock_irqsave, &mm_list_lock) {
|
||||
/* Insert into list, used for lookups when the child dies */
|
||||
list_add(&mm->context.list, &mm_list);
|
||||
}
|
||||
|
||||
ret = start_userspace(new_id);
|
||||
if (ret < 0)
|
||||
goto out_free;
|
||||
|
||||
/* Ensure the new MM is clean and nothing unwanted is mapped */
|
||||
unmap(new_id, 0, STUB_START);
|
||||
|
||||
|
@ -60,13 +64,82 @@ void destroy_context(struct mm_struct *mm)
|
|||
* zero, resulting in a kill(0), which will result in the
|
||||
* whole UML suddenly dying. Also, cover negative and
|
||||
* 1 cases, since they shouldn't happen either.
|
||||
*
|
||||
* Negative cases happen if the child died unexpectedly.
|
||||
*/
|
||||
if (mmu->id.pid < 2) {
|
||||
if (mmu->id.pid >= 0 && mmu->id.pid < 2) {
|
||||
printk(KERN_ERR "corrupt mm_context - pid = %d\n",
|
||||
mmu->id.pid);
|
||||
return;
|
||||
}
|
||||
os_kill_ptraced_process(mmu->id.pid, 1);
|
||||
|
||||
if (mmu->id.pid > 0) {
|
||||
os_kill_ptraced_process(mmu->id.pid, 1);
|
||||
mmu->id.pid = -1;
|
||||
}
|
||||
|
||||
if (using_seccomp && mmu->id.sock)
|
||||
os_close_file(mmu->id.sock);
|
||||
|
||||
free_pages(mmu->id.stack, ilog2(STUB_DATA_PAGES));
|
||||
|
||||
guard(spinlock_irqsave)(&mm_list_lock);
|
||||
|
||||
list_del(&mm->context.list);
|
||||
}
|
||||
|
||||
static irqreturn_t mm_sigchld_irq(int irq, void* dev)
|
||||
{
|
||||
struct mm_context *mm_context;
|
||||
pid_t pid;
|
||||
|
||||
guard(spinlock)(&mm_list_lock);
|
||||
|
||||
while ((pid = os_reap_child()) > 0) {
|
||||
/*
|
||||
* A child died, check if we have an MM with the PID. This is
|
||||
* only relevant in SECCOMP mode (as ptrace will fail anyway).
|
||||
*
|
||||
* See wait_stub_done_seccomp for more details.
|
||||
*/
|
||||
list_for_each_entry(mm_context, &mm_list, list) {
|
||||
if (mm_context->id.pid == pid) {
|
||||
struct stub_data *stub_data;
|
||||
printk("Unexpectedly lost MM child! Affected tasks will segfault.");
|
||||
|
||||
/* Marks the MM as dead */
|
||||
mm_context->id.pid = -1;
|
||||
|
||||
/*
|
||||
* NOTE: If SMP is implemented, a futex_wake
|
||||
* needs to be added here.
|
||||
*/
|
||||
stub_data = (void *)mm_context->id.stack;
|
||||
stub_data->futex = FUTEX_IN_KERN;
|
||||
|
||||
/*
|
||||
* NOTE: Currently executing syscalls by
|
||||
* affected tasks may finish normally.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return IRQ_HANDLED;
|
||||
}
|
||||
|
||||
static int __init init_child_tracking(void)
|
||||
{
|
||||
int err;
|
||||
|
||||
spin_lock_init(&mm_list_lock);
|
||||
INIT_LIST_HEAD(&mm_list);
|
||||
|
||||
err = request_irq(SIGCHLD_IRQ, mm_sigchld_irq, 0, "SIGCHLD", NULL);
|
||||
if (err < 0)
|
||||
panic("Failed to register SIGCHLD IRQ: %d", err);
|
||||
|
||||
return 0;
|
||||
}
|
||||
early_initcall(init_child_tracking)
|
||||
|
|
|
@ -5,21 +5,54 @@
|
|||
|
||||
#include <sysdep/stub.h>
|
||||
|
||||
static __always_inline int syscall_handler(struct stub_data *d)
|
||||
#include <linux/futex.h>
|
||||
#include <sys/socket.h>
|
||||
#include <errno.h>
|
||||
|
||||
/*
|
||||
* Known security issues
|
||||
*
|
||||
* Userspace can jump to this address to execute *any* syscall that is
|
||||
* permitted by the stub. As we will return afterwards, it can do
|
||||
* whatever it likes, including:
|
||||
* - Tricking the kernel into handing out the memory FD
|
||||
* - Using this memory FD to read/write all physical memory
|
||||
* - Running in parallel to the kernel processing a syscall
|
||||
* (possibly creating data races?)
|
||||
* - Blocking e.g. SIGALRM to avoid time based scheduling
|
||||
*
|
||||
* To avoid this, the permitted location for each syscall needs to be
|
||||
* checked for in the SECCOMP filter (which is reasonably simple). Also,
|
||||
* more care will need to go into considerations how the code might be
|
||||
* tricked by using a prepared stack (or even modifying the stack from
|
||||
* another thread in case SMP support is added).
|
||||
*
|
||||
* As for the SIGALRM, the best counter measure will be to check in the
|
||||
* kernel that the process is reporting back the SIGALRM in a timely
|
||||
* fashion.
|
||||
*/
|
||||
static __always_inline int syscall_handler(int fd_map[STUB_MAX_FDS])
|
||||
{
|
||||
struct stub_data *d = get_stub_data();
|
||||
int i;
|
||||
unsigned long res;
|
||||
int fd;
|
||||
|
||||
for (i = 0; i < d->syscall_data_len; i++) {
|
||||
struct stub_syscall *sc = &d->syscall_data[i];
|
||||
|
||||
switch (sc->syscall) {
|
||||
case STUB_SYSCALL_MMAP:
|
||||
if (fd_map)
|
||||
fd = fd_map[sc->mem.fd];
|
||||
else
|
||||
fd = sc->mem.fd;
|
||||
|
||||
res = stub_syscall6(STUB_MMAP_NR,
|
||||
sc->mem.addr, sc->mem.length,
|
||||
sc->mem.prot,
|
||||
MAP_SHARED | MAP_FIXED,
|
||||
sc->mem.fd, sc->mem.offset);
|
||||
fd, sc->mem.offset);
|
||||
if (res != sc->mem.addr) {
|
||||
d->err = res;
|
||||
d->syscall_data_len = i;
|
||||
|
@ -51,9 +84,98 @@ static __always_inline int syscall_handler(struct stub_data *d)
|
|||
void __section(".__syscall_stub")
|
||||
stub_syscall_handler(void)
|
||||
{
|
||||
struct stub_data *d = get_stub_data();
|
||||
|
||||
syscall_handler(d);
|
||||
syscall_handler(NULL);
|
||||
|
||||
trap_myself();
|
||||
}
|
||||
|
||||
void __section(".__syscall_stub")
|
||||
stub_signal_interrupt(int sig, siginfo_t *info, void *p)
|
||||
{
|
||||
struct stub_data *d = get_stub_data();
|
||||
char rcv_data;
|
||||
union {
|
||||
char data[CMSG_SPACE(sizeof(int) * STUB_MAX_FDS)];
|
||||
struct cmsghdr align;
|
||||
} ctrl = {};
|
||||
struct iovec iov = {
|
||||
.iov_base = &rcv_data,
|
||||
.iov_len = 1,
|
||||
};
|
||||
struct msghdr msghdr = {
|
||||
.msg_iov = &iov,
|
||||
.msg_iovlen = 1,
|
||||
.msg_control = &ctrl,
|
||||
.msg_controllen = sizeof(ctrl),
|
||||
};
|
||||
ucontext_t *uc = p;
|
||||
struct cmsghdr *fd_msg;
|
||||
int *fd_map;
|
||||
int num_fds;
|
||||
long res;
|
||||
|
||||
d->signal = sig;
|
||||
d->si_offset = (unsigned long)info - (unsigned long)&d->sigstack[0];
|
||||
d->mctx_offset = (unsigned long)&uc->uc_mcontext - (unsigned long)&d->sigstack[0];
|
||||
|
||||
restart_wait:
|
||||
d->futex = FUTEX_IN_KERN;
|
||||
do {
|
||||
res = stub_syscall3(__NR_futex, (unsigned long)&d->futex,
|
||||
FUTEX_WAKE, 1);
|
||||
} while (res == -EINTR);
|
||||
|
||||
do {
|
||||
res = stub_syscall4(__NR_futex, (unsigned long)&d->futex,
|
||||
FUTEX_WAIT, FUTEX_IN_KERN, 0);
|
||||
} while (res == -EINTR || d->futex == FUTEX_IN_KERN);
|
||||
|
||||
if (res < 0 && res != -EAGAIN)
|
||||
stub_syscall1(__NR_exit_group, 1);
|
||||
|
||||
if (d->syscall_data_len) {
|
||||
/* Read passed FDs (if any) */
|
||||
do {
|
||||
res = stub_syscall3(__NR_recvmsg, 0, (unsigned long)&msghdr, 0);
|
||||
} while (res == -EINTR);
|
||||
|
||||
/* We should never have a receive error (other than -EAGAIN) */
|
||||
if (res < 0 && res != -EAGAIN)
|
||||
stub_syscall1(__NR_exit_group, 1);
|
||||
|
||||
/* Receive the FDs */
|
||||
num_fds = 0;
|
||||
fd_msg = msghdr.msg_control;
|
||||
fd_map = (void *)&CMSG_DATA(fd_msg);
|
||||
if (res == iov.iov_len && msghdr.msg_controllen > sizeof(struct cmsghdr))
|
||||
num_fds = (fd_msg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
|
||||
|
||||
/* Try running queued syscalls. */
|
||||
res = syscall_handler(fd_map);
|
||||
|
||||
while (num_fds)
|
||||
stub_syscall2(__NR_close, fd_map[--num_fds], 0);
|
||||
} else {
|
||||
res = 0;
|
||||
}
|
||||
|
||||
if (res < 0 || d->restart_wait) {
|
||||
/* Report SIGSYS if we restart. */
|
||||
d->signal = SIGSYS;
|
||||
d->restart_wait = 0;
|
||||
|
||||
goto restart_wait;
|
||||
}
|
||||
|
||||
/* Restore arch dependent state that is not part of the mcontext */
|
||||
stub_seccomp_restore_state(&d->arch_data);
|
||||
|
||||
/* Return so that the host modified mcontext is restored. */
|
||||
}
|
||||
|
||||
void __section(".__syscall_stub")
|
||||
stub_signal_restorer(void)
|
||||
{
|
||||
/* We must not have anything on the stack when doing rt_sigreturn */
|
||||
stub_syscall0(__NR_rt_sigreturn);
|
||||
}
|
||||
|
|
|
@ -1,8 +1,12 @@
|
|||
#include <sys/ptrace.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <sys/fcntl.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <sysdep/stub.h>
|
||||
#include <stub-data.h>
|
||||
#include <linux/filter.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <generated/asm-offsets.h>
|
||||
|
||||
void _start(void);
|
||||
|
||||
|
@ -25,8 +29,6 @@ noinline static void real_init(void)
|
|||
} sa = {
|
||||
/* Need to set SA_RESTORER (but the handler never returns) */
|
||||
.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO | 0x04000000,
|
||||
/* no need to mask any signals */
|
||||
.sa_mask = 0,
|
||||
};
|
||||
|
||||
/* set a nice name */
|
||||
|
@ -35,13 +37,20 @@ noinline static void real_init(void)
|
|||
/* Make sure this process dies if the kernel dies */
|
||||
stub_syscall2(__NR_prctl, PR_SET_PDEATHSIG, SIGKILL);
|
||||
|
||||
/* Needed in SECCOMP mode (and safe to do anyway) */
|
||||
stub_syscall5(__NR_prctl, PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
|
||||
/* read information from STDIN and close it */
|
||||
res = stub_syscall3(__NR_read, 0,
|
||||
(unsigned long)&init_data, sizeof(init_data));
|
||||
if (res != sizeof(init_data))
|
||||
stub_syscall1(__NR_exit, 10);
|
||||
|
||||
stub_syscall1(__NR_close, 0);
|
||||
/* In SECCOMP mode, FD 0 is a socket and is later used for FD passing */
|
||||
if (!init_data.seccomp)
|
||||
stub_syscall1(__NR_close, 0);
|
||||
else
|
||||
stub_syscall3(__NR_fcntl, 0, F_SETFL, O_NONBLOCK);
|
||||
|
||||
/* map stub code + data */
|
||||
res = stub_syscall6(STUB_MMAP_NR,
|
||||
|
@ -59,22 +68,148 @@ noinline static void real_init(void)
|
|||
if (res != init_data.stub_start + UM_KERN_PAGE_SIZE)
|
||||
stub_syscall1(__NR_exit, 12);
|
||||
|
||||
/* In SECCOMP mode, we only need the signalling FD from now on */
|
||||
if (init_data.seccomp) {
|
||||
res = stub_syscall3(__NR_close_range, 1, ~0U, 0);
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 13);
|
||||
}
|
||||
|
||||
/* setup signal stack inside stub data */
|
||||
stack.ss_sp = (void *)init_data.stub_start + UM_KERN_PAGE_SIZE;
|
||||
stub_syscall2(__NR_sigaltstack, (unsigned long)&stack, 0);
|
||||
|
||||
/* register SIGSEGV handler */
|
||||
sa.sa_handler_ = (void *) init_data.segv_handler;
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV, (unsigned long)&sa, 0,
|
||||
sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 13);
|
||||
/* register signal handlers */
|
||||
sa.sa_handler_ = (void *) init_data.signal_handler;
|
||||
sa.sa_restorer = (void *) init_data.signal_restorer;
|
||||
if (!init_data.seccomp) {
|
||||
/* In ptrace mode, the SIGSEGV handler never returns */
|
||||
sa.sa_mask = 0;
|
||||
|
||||
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 14);
|
||||
} else {
|
||||
/* SECCOMP mode uses rt_sigreturn, need to mask all signals */
|
||||
sa.sa_mask = ~0ULL;
|
||||
|
||||
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGSEGV,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 15);
|
||||
|
||||
stub_syscall1(__NR_exit, 14);
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGSYS,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 16);
|
||||
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGALRM,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 17);
|
||||
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGTRAP,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 18);
|
||||
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGILL,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 19);
|
||||
|
||||
res = stub_syscall4(__NR_rt_sigaction, SIGFPE,
|
||||
(unsigned long)&sa, 0, sizeof(sa.sa_mask));
|
||||
if (res != 0)
|
||||
stub_syscall1(__NR_exit, 20);
|
||||
}
|
||||
|
||||
/*
|
||||
* If in seccomp mode, install the SECCOMP filter and trigger a syscall.
|
||||
* Otherwise set PTRACE_TRACEME and do a SIGSTOP.
|
||||
*/
|
||||
if (init_data.seccomp) {
|
||||
struct sock_filter filter[] = {
|
||||
#if __BITS_PER_LONG > 32
|
||||
/* [0] Load upper 32bit of instruction pointer from seccomp_data */
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
||||
(offsetof(struct seccomp_data, instruction_pointer) + 4)),
|
||||
|
||||
/* [1] Jump forward 3 instructions if the upper address is not identical */
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) >> 32, 0, 3),
|
||||
#endif
|
||||
/* [2] Load lower 32bit of instruction pointer from seccomp_data */
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
||||
(offsetof(struct seccomp_data, instruction_pointer))),
|
||||
|
||||
/* [3] Mask out lower bits */
|
||||
BPF_STMT(BPF_ALU | BPF_AND | BPF_K, 0xfffff000),
|
||||
|
||||
/* [4] Jump to [6] if the lower bits are not on the expected page */
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, (init_data.stub_start) & 0xfffff000, 1, 0),
|
||||
|
||||
/* [5] Trap call, allow */
|
||||
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
|
||||
|
||||
/* [6,7] Check architecture */
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
||||
offsetof(struct seccomp_data, arch)),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,
|
||||
UM_SECCOMP_ARCH_NATIVE, 1, 0),
|
||||
|
||||
/* [8] Kill (for architecture check) */
|
||||
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
|
||||
|
||||
/* [9] Load syscall number */
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
|
||||
/* [10-16] Check against permitted syscalls */
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_futex,
|
||||
7, 0),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_recvmsg,
|
||||
6, 0),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K,__NR_close,
|
||||
5, 0),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, STUB_MMAP_NR,
|
||||
4, 0),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_munmap,
|
||||
3, 0),
|
||||
#ifdef __i386__
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_set_thread_area,
|
||||
2, 0),
|
||||
#else
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_arch_prctl,
|
||||
2, 0),
|
||||
#endif
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_rt_sigreturn,
|
||||
1, 0),
|
||||
|
||||
/* [17] Not one of the permitted syscalls */
|
||||
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_KILL_PROCESS),
|
||||
|
||||
/* [18] Permitted call for the stub */
|
||||
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
|
||||
};
|
||||
struct sock_fprog prog = {
|
||||
.len = sizeof(filter) / sizeof(filter[0]),
|
||||
.filter = filter,
|
||||
};
|
||||
|
||||
if (stub_syscall3(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
|
||||
SECCOMP_FILTER_FLAG_TSYNC,
|
||||
(unsigned long)&prog) != 0)
|
||||
stub_syscall1(__NR_exit, 21);
|
||||
|
||||
/* Fall through, the exit syscall will cause SIGSYS */
|
||||
} else {
|
||||
stub_syscall4(__NR_ptrace, PTRACE_TRACEME, 0, 0, 0);
|
||||
|
||||
stub_syscall2(__NR_kill, stub_syscall0(__NR_getpid), SIGSTOP);
|
||||
}
|
||||
|
||||
stub_syscall1(__NR_exit, 30);
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
|
|
|
@ -856,11 +856,16 @@ static struct clock_event_device timer_clockevent = {
|
|||
|
||||
static irqreturn_t um_timer(int irq, void *dev)
|
||||
{
|
||||
if (get_current()->mm != NULL)
|
||||
{
|
||||
/* userspace - relay signal, results in correct userspace timers */
|
||||
/*
|
||||
* Interrupt the (possibly) running userspace process, technically this
|
||||
* should only happen if userspace is currently executing.
|
||||
* With infinite CPU time-travel, we can only get here when userspace
|
||||
* is not executing. Do not notify there and avoid spurious scheduling.
|
||||
*/
|
||||
if (time_travel_mode != TT_MODE_INFCPU &&
|
||||
time_travel_mode != TT_MODE_EXTERNAL &&
|
||||
get_current()->mm)
|
||||
os_alarm_process(get_current()->mm->context.id.pid);
|
||||
}
|
||||
|
||||
(*timer_clockevent.event_handler)(&timer_clockevent);
|
||||
|
||||
|
|
|
@ -16,7 +16,122 @@
|
|||
#include <kern_util.h>
|
||||
#include <os.h>
|
||||
#include <skas.h>
|
||||
#include <arch.h>
|
||||
|
||||
/*
|
||||
* NOTE: UML does not have exception tables. As such, this is almost a copy
|
||||
* of the code in mm/memory.c, only adjusting the logic to simply check whether
|
||||
* we are coming from the kernel instead of doing an additional lookup in the
|
||||
* exception table.
|
||||
* We can do this simplification because we never get here if the exception was
|
||||
* fixable.
|
||||
*/
|
||||
static inline bool get_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
|
||||
{
|
||||
if (likely(mmap_read_trylock(mm)))
|
||||
return true;
|
||||
|
||||
if (!is_user)
|
||||
return false;
|
||||
|
||||
return !mmap_read_lock_killable(mm);
|
||||
}
|
||||
|
||||
static inline bool mmap_upgrade_trylock(struct mm_struct *mm)
|
||||
{
|
||||
/*
|
||||
* We don't have this operation yet.
|
||||
*
|
||||
* It should be easy enough to do: it's basically a
|
||||
* atomic_long_try_cmpxchg_acquire()
|
||||
* from RWSEM_READER_BIAS -> RWSEM_WRITER_LOCKED, but
|
||||
* it also needs the proper lockdep magic etc.
|
||||
*/
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool upgrade_mmap_lock_carefully(struct mm_struct *mm, bool is_user)
|
||||
{
|
||||
mmap_read_unlock(mm);
|
||||
if (!is_user)
|
||||
return false;
|
||||
|
||||
return !mmap_write_lock_killable(mm);
|
||||
}
|
||||
|
||||
/*
|
||||
* Helper for page fault handling.
|
||||
*
|
||||
* This is kind of equivalend to "mmap_read_lock()" followed
|
||||
* by "find_extend_vma()", except it's a lot more careful about
|
||||
* the locking (and will drop the lock on failure).
|
||||
*
|
||||
* For example, if we have a kernel bug that causes a page
|
||||
* fault, we don't want to just use mmap_read_lock() to get
|
||||
* the mm lock, because that would deadlock if the bug were
|
||||
* to happen while we're holding the mm lock for writing.
|
||||
*
|
||||
* So this checks the exception tables on kernel faults in
|
||||
* order to only do this all for instructions that are actually
|
||||
* expected to fault.
|
||||
*
|
||||
* We can also actually take the mm lock for writing if we
|
||||
* need to extend the vma, which helps the VM layer a lot.
|
||||
*/
|
||||
static struct vm_area_struct *
|
||||
um_lock_mm_and_find_vma(struct mm_struct *mm,
|
||||
unsigned long addr, bool is_user)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (!get_mmap_lock_carefully(mm, is_user))
|
||||
return NULL;
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (likely(vma && (vma->vm_start <= addr)))
|
||||
return vma;
|
||||
|
||||
/*
|
||||
* Well, dang. We might still be successful, but only
|
||||
* if we can extend a vma to do so.
|
||||
*/
|
||||
if (!vma || !(vma->vm_flags & VM_GROWSDOWN)) {
|
||||
mmap_read_unlock(mm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* We can try to upgrade the mmap lock atomically,
|
||||
* in which case we can continue to use the vma
|
||||
* we already looked up.
|
||||
*
|
||||
* Otherwise we'll have to drop the mmap lock and
|
||||
* re-take it, and also look up the vma again,
|
||||
* re-checking it.
|
||||
*/
|
||||
if (!mmap_upgrade_trylock(mm)) {
|
||||
if (!upgrade_mmap_lock_carefully(mm, is_user))
|
||||
return NULL;
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (!vma)
|
||||
goto fail;
|
||||
if (vma->vm_start <= addr)
|
||||
goto success;
|
||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (expand_stack_locked(vma, addr))
|
||||
goto fail;
|
||||
|
||||
success:
|
||||
mmap_write_downgrade(mm);
|
||||
return vma;
|
||||
|
||||
fail:
|
||||
mmap_write_unlock(mm);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note this is constrained to return 0, -EFAULT, -EACCES, -ENOMEM by
|
||||
|
@ -44,21 +159,10 @@ int handle_page_fault(unsigned long address, unsigned long ip,
|
|||
if (is_user)
|
||||
flags |= FAULT_FLAG_USER;
|
||||
retry:
|
||||
mmap_read_lock(mm);
|
||||
vma = find_vma(mm, address);
|
||||
if (!vma)
|
||||
goto out;
|
||||
if (vma->vm_start <= address)
|
||||
goto good_area;
|
||||
if (!(vma->vm_flags & VM_GROWSDOWN))
|
||||
goto out;
|
||||
if (is_user && !ARCH_IS_STACKGROW(address))
|
||||
goto out;
|
||||
vma = expand_stack(mm, address);
|
||||
vma = um_lock_mm_and_find_vma(mm, address, is_user);
|
||||
if (!vma)
|
||||
goto out_nosemaphore;
|
||||
|
||||
good_area:
|
||||
*code_out = SEGV_ACCERR;
|
||||
if (is_write) {
|
||||
if (!(vma->vm_flags & VM_WRITE))
|
||||
|
|
|
@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n
|
|||
|
||||
obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
|
||||
registers.o sigio.o signal.o start_up.o time.o tty.o \
|
||||
umid.o user_syms.o util.o drivers/ skas/
|
||||
umid.o user_syms.o util.o skas/
|
||||
|
||||
CFLAGS_signal.o += -Wframe-larger-than=4096
|
||||
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
# SPDX-License-Identifier: GPL-2.0
|
||||
#
|
||||
# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
|
||||
#
|
||||
|
||||
ethertap-objs := ethertap_kern.o ethertap_user.o
|
||||
tuntap-objs := tuntap_kern.o tuntap_user.o
|
||||
|
||||
obj-y =
|
||||
obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o
|
||||
obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o
|
||||
|
||||
include $(srctree)/arch/um/scripts/Makefile.rules
|
|
@ -1,21 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#ifndef __DRIVERS_ETAP_H
|
||||
#define __DRIVERS_ETAP_H
|
||||
|
||||
#include <net_user.h>
|
||||
|
||||
struct ethertap_data {
|
||||
char *dev_name;
|
||||
char *gate_addr;
|
||||
int data_fd;
|
||||
int control_fd;
|
||||
void *dev;
|
||||
};
|
||||
|
||||
extern const struct net_user_info ethertap_user_info;
|
||||
|
||||
#endif
|
|
@ -1,100 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*/
|
||||
|
||||
#include <linux/init.h>
|
||||
#include <linux/netdevice.h>
|
||||
#include "etap.h"
|
||||
#include <net_kern.h>
|
||||
|
||||
struct ethertap_init {
|
||||
char *dev_name;
|
||||
char *gate_addr;
|
||||
};
|
||||
|
||||
static void etap_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct uml_net_private *pri;
|
||||
struct ethertap_data *epri;
|
||||
struct ethertap_init *init = data;
|
||||
|
||||
pri = netdev_priv(dev);
|
||||
epri = (struct ethertap_data *) pri->user;
|
||||
epri->dev_name = init->dev_name;
|
||||
epri->gate_addr = init->gate_addr;
|
||||
epri->data_fd = -1;
|
||||
epri->control_fd = -1;
|
||||
epri->dev = dev;
|
||||
|
||||
printk(KERN_INFO "ethertap backend - %s", epri->dev_name);
|
||||
if (epri->gate_addr != NULL)
|
||||
printk(KERN_CONT ", IP = %s", epri->gate_addr);
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = net_recvfrom(fd, skb_mac_header(skb),
|
||||
skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP);
|
||||
if (len <= 0)
|
||||
return(len);
|
||||
|
||||
skb_pull(skb, 2);
|
||||
len -= 2;
|
||||
return len;
|
||||
}
|
||||
|
||||
static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
skb_push(skb, 2);
|
||||
return net_send(fd, skb->data, skb->len);
|
||||
}
|
||||
|
||||
const struct net_kern_info ethertap_kern_info = {
|
||||
.init = etap_init,
|
||||
.protocol = eth_protocol,
|
||||
.read = etap_read,
|
||||
.write = etap_write,
|
||||
};
|
||||
|
||||
static int ethertap_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct ethertap_init *init = data;
|
||||
|
||||
*init = ((struct ethertap_init)
|
||||
{ .dev_name = NULL,
|
||||
.gate_addr = NULL });
|
||||
if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
|
||||
&init->gate_addr))
|
||||
return 0;
|
||||
if (init->dev_name == NULL) {
|
||||
printk(KERN_ERR "ethertap_setup : Missing tap device name\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport ethertap_transport = {
|
||||
.list = LIST_HEAD_INIT(ethertap_transport.list),
|
||||
.name = "ethertap",
|
||||
.setup = ethertap_setup,
|
||||
.user = ðertap_user_info,
|
||||
.kern = ðertap_kern_info,
|
||||
.private_size = sizeof(struct ethertap_data),
|
||||
.setup_size = sizeof(struct ethertap_init),
|
||||
};
|
||||
|
||||
static int register_ethertap(void)
|
||||
{
|
||||
register_transport(ðertap_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_ethertap);
|
|
@ -1,248 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
* Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
|
||||
* James Leu (jleu@mindspring.net).
|
||||
* Copyright (C) 2001 by various other people who didn't put their name here.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/wait.h>
|
||||
#include "etap.h"
|
||||
#include <os.h>
|
||||
#include <net_user.h>
|
||||
#include <um_malloc.h>
|
||||
|
||||
#define MAX_PACKET ETH_MAX_PACKET
|
||||
|
||||
static int etap_user_init(void *data, void *dev)
|
||||
{
|
||||
struct ethertap_data *pri = data;
|
||||
|
||||
pri->dev = dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct addr_change {
|
||||
enum { ADD_ADDR, DEL_ADDR } what;
|
||||
unsigned char addr[4];
|
||||
unsigned char netmask[4];
|
||||
};
|
||||
|
||||
static void etap_change(int op, unsigned char *addr, unsigned char *netmask,
|
||||
int fd)
|
||||
{
|
||||
struct addr_change change;
|
||||
char *output;
|
||||
int n;
|
||||
|
||||
change.what = op;
|
||||
memcpy(change.addr, addr, sizeof(change.addr));
|
||||
memcpy(change.netmask, netmask, sizeof(change.netmask));
|
||||
CATCH_EINTR(n = write(fd, &change, sizeof(change)));
|
||||
if (n != sizeof(change)) {
|
||||
printk(UM_KERN_ERR "etap_change - request failed, err = %d\n",
|
||||
errno);
|
||||
return;
|
||||
}
|
||||
|
||||
output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL);
|
||||
if (output == NULL)
|
||||
printk(UM_KERN_ERR "etap_change : Failed to allocate output "
|
||||
"buffer\n");
|
||||
read_output(fd, output, UM_KERN_PAGE_SIZE);
|
||||
if (output != NULL) {
|
||||
printk("%s", output);
|
||||
kfree(output);
|
||||
}
|
||||
}
|
||||
|
||||
static void etap_open_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *arg)
|
||||
{
|
||||
etap_change(ADD_ADDR, addr, netmask, *((int *) arg));
|
||||
}
|
||||
|
||||
static void etap_close_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *arg)
|
||||
{
|
||||
etap_change(DEL_ADDR, addr, netmask, *((int *) arg));
|
||||
}
|
||||
|
||||
struct etap_pre_exec_data {
|
||||
int control_remote;
|
||||
int control_me;
|
||||
int data_me;
|
||||
};
|
||||
|
||||
static void etap_pre_exec(void *arg)
|
||||
{
|
||||
struct etap_pre_exec_data *data = arg;
|
||||
|
||||
dup2(data->control_remote, 1);
|
||||
close(data->data_me);
|
||||
close(data->control_me);
|
||||
}
|
||||
|
||||
static int etap_tramp(char *dev, char *gate, int control_me,
|
||||
int control_remote, int data_me, int data_remote)
|
||||
{
|
||||
struct etap_pre_exec_data pe_data;
|
||||
int pid, err, n;
|
||||
char version_buf[sizeof("nnnnn\0")];
|
||||
char data_fd_buf[sizeof("nnnnnn\0")];
|
||||
char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
|
||||
char *setup_args[] = { "uml_net", version_buf, "ethertap", dev,
|
||||
data_fd_buf, gate_buf, NULL };
|
||||
char *nosetup_args[] = { "uml_net", version_buf, "ethertap",
|
||||
dev, data_fd_buf, NULL };
|
||||
char **args, c;
|
||||
|
||||
sprintf(data_fd_buf, "%d", data_remote);
|
||||
sprintf(version_buf, "%d", UML_NET_VERSION);
|
||||
if (gate != NULL) {
|
||||
strscpy(gate_buf, gate);
|
||||
args = setup_args;
|
||||
}
|
||||
else args = nosetup_args;
|
||||
|
||||
err = 0;
|
||||
pe_data.control_remote = control_remote;
|
||||
pe_data.control_me = control_me;
|
||||
pe_data.data_me = data_me;
|
||||
pid = run_helper(etap_pre_exec, &pe_data, args);
|
||||
|
||||
if (pid < 0)
|
||||
err = pid;
|
||||
close(data_remote);
|
||||
close(control_remote);
|
||||
CATCH_EINTR(n = read(control_me, &c, sizeof(c)));
|
||||
if (n != sizeof(c)) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "etap_tramp : read of status failed, "
|
||||
"err = %d\n", -err);
|
||||
return err;
|
||||
}
|
||||
if (c != 1) {
|
||||
printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
|
||||
err = helper_wait(pid);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int etap_open(void *data)
|
||||
{
|
||||
struct ethertap_data *pri = data;
|
||||
char *output;
|
||||
int data_fds[2], control_fds[2], err, output_len;
|
||||
|
||||
err = tap_open_common(pri->dev, pri->gate_addr);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "etap_open - data socketpair failed - "
|
||||
"err = %d\n", errno);
|
||||
return err;
|
||||
}
|
||||
|
||||
err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "etap_open - control socketpair failed - "
|
||||
"err = %d\n", errno);
|
||||
goto out_close_data;
|
||||
}
|
||||
|
||||
err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0],
|
||||
control_fds[1], data_fds[0], data_fds[1]);
|
||||
output_len = UM_KERN_PAGE_SIZE;
|
||||
output = uml_kmalloc(output_len, UM_GFP_KERNEL);
|
||||
read_output(control_fds[0], output, output_len);
|
||||
|
||||
if (output == NULL)
|
||||
printk(UM_KERN_ERR "etap_open : failed to allocate output "
|
||||
"buffer\n");
|
||||
else {
|
||||
printk("%s", output);
|
||||
kfree(output);
|
||||
}
|
||||
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err);
|
||||
goto out_close_control;
|
||||
}
|
||||
|
||||
pri->data_fd = data_fds[0];
|
||||
pri->control_fd = control_fds[0];
|
||||
iter_addresses(pri->dev, etap_open_addr, &pri->control_fd);
|
||||
return data_fds[0];
|
||||
|
||||
out_close_control:
|
||||
close(control_fds[0]);
|
||||
close(control_fds[1]);
|
||||
out_close_data:
|
||||
close(data_fds[0]);
|
||||
close(data_fds[1]);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void etap_close(int fd, void *data)
|
||||
{
|
||||
struct ethertap_data *pri = data;
|
||||
|
||||
iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
|
||||
close(fd);
|
||||
|
||||
if (shutdown(pri->data_fd, SHUT_RDWR) < 0)
|
||||
printk(UM_KERN_ERR "etap_close - shutdown data socket failed, "
|
||||
"errno = %d\n", errno);
|
||||
|
||||
if (shutdown(pri->control_fd, SHUT_RDWR) < 0)
|
||||
printk(UM_KERN_ERR "etap_close - shutdown control socket "
|
||||
"failed, errno = %d\n", errno);
|
||||
|
||||
close(pri->data_fd);
|
||||
pri->data_fd = -1;
|
||||
close(pri->control_fd);
|
||||
pri->control_fd = -1;
|
||||
}
|
||||
|
||||
static void etap_add_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *data)
|
||||
{
|
||||
struct ethertap_data *pri = data;
|
||||
|
||||
tap_check_ips(pri->gate_addr, addr);
|
||||
if (pri->control_fd == -1)
|
||||
return;
|
||||
etap_open_addr(addr, netmask, &pri->control_fd);
|
||||
}
|
||||
|
||||
static void etap_del_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *data)
|
||||
{
|
||||
struct ethertap_data *pri = data;
|
||||
|
||||
if (pri->control_fd == -1)
|
||||
return;
|
||||
|
||||
etap_close_addr(addr, netmask, &pri->control_fd);
|
||||
}
|
||||
|
||||
const struct net_user_info ethertap_user_info = {
|
||||
.init = etap_user_init,
|
||||
.open = etap_open,
|
||||
.close = etap_close,
|
||||
.remove = NULL,
|
||||
.add_address = etap_add_addr,
|
||||
.delete_address = etap_del_addr,
|
||||
.mtu = ETH_MAX_PACKET,
|
||||
.max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP,
|
||||
};
|
|
@ -1,21 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#ifndef __UM_TUNTAP_H
|
||||
#define __UM_TUNTAP_H
|
||||
|
||||
#include <net_user.h>
|
||||
|
||||
struct tuntap_data {
|
||||
char *dev_name;
|
||||
int fixed_config;
|
||||
char *gate_addr;
|
||||
int fd;
|
||||
void *dev;
|
||||
};
|
||||
|
||||
extern const struct net_user_info tuntap_user_info;
|
||||
|
||||
#endif
|
|
@ -1,86 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <linux/netdevice.h>
|
||||
#include <linux/init.h>
|
||||
#include <linux/skbuff.h>
|
||||
#include <asm/errno.h>
|
||||
#include <net_kern.h>
|
||||
#include "tuntap.h"
|
||||
|
||||
struct tuntap_init {
|
||||
char *dev_name;
|
||||
char *gate_addr;
|
||||
};
|
||||
|
||||
static void tuntap_init(struct net_device *dev, void *data)
|
||||
{
|
||||
struct uml_net_private *pri;
|
||||
struct tuntap_data *tpri;
|
||||
struct tuntap_init *init = data;
|
||||
|
||||
pri = netdev_priv(dev);
|
||||
tpri = (struct tuntap_data *) pri->user;
|
||||
tpri->dev_name = init->dev_name;
|
||||
tpri->fixed_config = (init->dev_name != NULL);
|
||||
tpri->gate_addr = init->gate_addr;
|
||||
tpri->fd = -1;
|
||||
tpri->dev = dev;
|
||||
|
||||
printk(KERN_INFO "TUN/TAP backend - ");
|
||||
if (tpri->gate_addr != NULL)
|
||||
printk(KERN_CONT "IP = %s", tpri->gate_addr);
|
||||
printk(KERN_CONT "\n");
|
||||
}
|
||||
|
||||
static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return net_read(fd, skb_mac_header(skb),
|
||||
skb->dev->mtu + ETH_HEADER_OTHER);
|
||||
}
|
||||
|
||||
static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
|
||||
{
|
||||
return net_write(fd, skb->data, skb->len);
|
||||
}
|
||||
|
||||
const struct net_kern_info tuntap_kern_info = {
|
||||
.init = tuntap_init,
|
||||
.protocol = eth_protocol,
|
||||
.read = tuntap_read,
|
||||
.write = tuntap_write,
|
||||
};
|
||||
|
||||
static int tuntap_setup(char *str, char **mac_out, void *data)
|
||||
{
|
||||
struct tuntap_init *init = data;
|
||||
|
||||
*init = ((struct tuntap_init)
|
||||
{ .dev_name = NULL,
|
||||
.gate_addr = NULL });
|
||||
if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
|
||||
&init->gate_addr))
|
||||
return 0;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static struct transport tuntap_transport = {
|
||||
.list = LIST_HEAD_INIT(tuntap_transport.list),
|
||||
.name = "tuntap",
|
||||
.setup = tuntap_setup,
|
||||
.user = &tuntap_user_info,
|
||||
.kern = &tuntap_kern_info,
|
||||
.private_size = sizeof(struct tuntap_data),
|
||||
.setup_size = sizeof(struct tuntap_init),
|
||||
};
|
||||
|
||||
static int register_tuntap(void)
|
||||
{
|
||||
register_transport(&tuntap_transport);
|
||||
return 0;
|
||||
}
|
||||
|
||||
late_initcall(register_tuntap);
|
|
@ -1,215 +0,0 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <linux/if_tun.h>
|
||||
#include <net/if.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/uio.h>
|
||||
#include <kern_util.h>
|
||||
#include <os.h>
|
||||
#include "tuntap.h"
|
||||
|
||||
static int tuntap_user_init(void *data, void *dev)
|
||||
{
|
||||
struct tuntap_data *pri = data;
|
||||
|
||||
pri->dev = dev;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *data)
|
||||
{
|
||||
struct tuntap_data *pri = data;
|
||||
|
||||
tap_check_ips(pri->gate_addr, addr);
|
||||
if ((pri->fd == -1) || pri->fixed_config)
|
||||
return;
|
||||
open_addr(addr, netmask, pri->dev_name);
|
||||
}
|
||||
|
||||
static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask,
|
||||
void *data)
|
||||
{
|
||||
struct tuntap_data *pri = data;
|
||||
|
||||
if ((pri->fd == -1) || pri->fixed_config)
|
||||
return;
|
||||
close_addr(addr, netmask, pri->dev_name);
|
||||
}
|
||||
|
||||
struct tuntap_pre_exec_data {
|
||||
int stdout_fd;
|
||||
int close_me;
|
||||
};
|
||||
|
||||
static void tuntap_pre_exec(void *arg)
|
||||
{
|
||||
struct tuntap_pre_exec_data *data = arg;
|
||||
|
||||
dup2(data->stdout_fd, 1);
|
||||
close(data->close_me);
|
||||
}
|
||||
|
||||
static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
|
||||
char *buffer, int buffer_len, int *used_out)
|
||||
{
|
||||
struct tuntap_pre_exec_data data;
|
||||
char version_buf[sizeof("nnnnn\0")];
|
||||
char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate,
|
||||
NULL };
|
||||
char buf[CMSG_SPACE(sizeof(*fd_out))];
|
||||
struct msghdr msg;
|
||||
struct cmsghdr *cmsg;
|
||||
struct iovec iov;
|
||||
int pid, n, err;
|
||||
|
||||
sprintf(version_buf, "%d", UML_NET_VERSION);
|
||||
|
||||
data.stdout_fd = remote;
|
||||
data.close_me = me;
|
||||
|
||||
pid = run_helper(tuntap_pre_exec, &data, argv);
|
||||
|
||||
if (pid < 0)
|
||||
return pid;
|
||||
|
||||
close(remote);
|
||||
|
||||
msg.msg_name = NULL;
|
||||
msg.msg_namelen = 0;
|
||||
if (buffer != NULL) {
|
||||
iov = ((struct iovec) { buffer, buffer_len });
|
||||
msg.msg_iov = &iov;
|
||||
msg.msg_iovlen = 1;
|
||||
}
|
||||
else {
|
||||
msg.msg_iov = NULL;
|
||||
msg.msg_iovlen = 0;
|
||||
}
|
||||
msg.msg_control = buf;
|
||||
msg.msg_controllen = sizeof(buf);
|
||||
msg.msg_flags = 0;
|
||||
n = recvmsg(me, &msg, 0);
|
||||
*used_out = n;
|
||||
if (n < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - "
|
||||
"errno = %d\n", errno);
|
||||
return err;
|
||||
}
|
||||
helper_wait(pid);
|
||||
|
||||
cmsg = CMSG_FIRSTHDR(&msg);
|
||||
if (cmsg == NULL) {
|
||||
printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
|
||||
"message\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
if ((cmsg->cmsg_level != SOL_SOCKET) ||
|
||||
(cmsg->cmsg_type != SCM_RIGHTS)) {
|
||||
printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
|
||||
"descriptor\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
*fd_out = ((int *) CMSG_DATA(cmsg))[0];
|
||||
os_set_exec_close(*fd_out);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int tuntap_open(void *data)
|
||||
{
|
||||
struct ifreq ifr;
|
||||
struct tuntap_data *pri = data;
|
||||
char *output, *buffer;
|
||||
int err, fds[2], len, used;
|
||||
|
||||
err = tap_open_common(pri->dev, pri->gate_addr);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
||||
if (pri->fixed_config) {
|
||||
pri->fd = os_open_file("/dev/net/tun",
|
||||
of_cloexec(of_rdwr(OPENFLAGS())), 0);
|
||||
if (pri->fd < 0) {
|
||||
printk(UM_KERN_ERR "Failed to open /dev/net/tun, "
|
||||
"err = %d\n", -pri->fd);
|
||||
return pri->fd;
|
||||
}
|
||||
memset(&ifr, 0, sizeof(ifr));
|
||||
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
|
||||
strscpy(ifr.ifr_name, pri->dev_name);
|
||||
if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
|
||||
errno);
|
||||
close(pri->fd);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
else {
|
||||
err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
|
||||
if (err) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "tuntap_open : socketpair failed - "
|
||||
"errno = %d\n", errno);
|
||||
return err;
|
||||
}
|
||||
|
||||
buffer = get_output_buffer(&len);
|
||||
if (buffer != NULL)
|
||||
len--;
|
||||
used = 0;
|
||||
|
||||
err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0],
|
||||
fds[1], buffer, len, &used);
|
||||
|
||||
output = buffer;
|
||||
if (err < 0) {
|
||||
printk("%s", output);
|
||||
free_output_buffer(buffer);
|
||||
printk(UM_KERN_ERR "tuntap_open_tramp failed - "
|
||||
"err = %d\n", -err);
|
||||
return err;
|
||||
}
|
||||
|
||||
pri->dev_name = uml_strdup(buffer);
|
||||
output += IFNAMSIZ;
|
||||
printk("%s", output);
|
||||
free_output_buffer(buffer);
|
||||
|
||||
close(fds[0]);
|
||||
iter_addresses(pri->dev, open_addr, pri->dev_name);
|
||||
}
|
||||
|
||||
return pri->fd;
|
||||
}
|
||||
|
||||
static void tuntap_close(int fd, void *data)
|
||||
{
|
||||
struct tuntap_data *pri = data;
|
||||
|
||||
if (!pri->fixed_config)
|
||||
iter_addresses(pri->dev, close_addr, pri->dev_name);
|
||||
close(fd);
|
||||
pri->fd = -1;
|
||||
}
|
||||
|
||||
const struct net_user_info tuntap_user_info = {
|
||||
.init = tuntap_user_init,
|
||||
.open = tuntap_open,
|
||||
.close = tuntap_close,
|
||||
.remove = NULL,
|
||||
.add_address = tuntap_add_addr,
|
||||
.delete_address = tuntap_del_addr,
|
||||
.mtu = ETH_MAX_PACKET,
|
||||
.max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
|
||||
};
|
|
@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf)
|
|||
return 0;
|
||||
}
|
||||
|
||||
int os_set_slip(int fd)
|
||||
{
|
||||
int disc, sencap;
|
||||
|
||||
disc = N_SLIP;
|
||||
if (ioctl(fd, TIOCSETD, &disc) < 0)
|
||||
return -errno;
|
||||
|
||||
sencap = 0;
|
||||
if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
|
||||
return -errno;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int os_mode_fd(int fd, int mode)
|
||||
{
|
||||
int err;
|
||||
|
|
|
@ -2,6 +2,9 @@
|
|||
#ifndef __UM_OS_LINUX_INTERNAL_H
|
||||
#define __UM_OS_LINUX_INTERNAL_H
|
||||
|
||||
#include <mm_id.h>
|
||||
#include <stub-data.h>
|
||||
|
||||
/*
|
||||
* elf_aux.c
|
||||
*/
|
||||
|
@ -16,5 +19,5 @@ void check_tmpexec(void);
|
|||
* skas/process.c
|
||||
*/
|
||||
void wait_stub_done(int pid);
|
||||
|
||||
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
|
||||
#endif /* __UM_OS_LINUX_INTERNAL_H */
|
||||
|
|
|
@ -18,17 +18,29 @@
|
|||
#include <init.h>
|
||||
#include <longjmp.h>
|
||||
#include <os.h>
|
||||
#include <skas/skas.h>
|
||||
|
||||
void os_alarm_process(int pid)
|
||||
{
|
||||
if (pid <= 0)
|
||||
return;
|
||||
|
||||
kill(pid, SIGALRM);
|
||||
}
|
||||
|
||||
void os_kill_process(int pid, int reap_child)
|
||||
{
|
||||
if (pid <= 0)
|
||||
return;
|
||||
|
||||
/* Block signals until child is reaped */
|
||||
block_signals();
|
||||
|
||||
kill(pid, SIGKILL);
|
||||
if (reap_child)
|
||||
CATCH_EINTR(waitpid(pid, NULL, __WALL));
|
||||
|
||||
unblock_signals();
|
||||
}
|
||||
|
||||
/* Kill off a ptraced child by all means available. kill it normally first,
|
||||
|
@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child)
|
|||
|
||||
void os_kill_ptraced_process(int pid, int reap_child)
|
||||
{
|
||||
if (pid <= 0)
|
||||
return;
|
||||
|
||||
/* Block signals until child is reaped */
|
||||
block_signals();
|
||||
|
||||
kill(pid, SIGKILL);
|
||||
ptrace(PTRACE_KILL, pid);
|
||||
ptrace(PTRACE_CONT, pid);
|
||||
if (reap_child)
|
||||
CATCH_EINTR(waitpid(pid, NULL, __WALL));
|
||||
|
||||
unblock_signals();
|
||||
}
|
||||
|
||||
pid_t os_reap_child(void)
|
||||
{
|
||||
int status;
|
||||
|
||||
/* Try to reap a child */
|
||||
return waitpid(-1, &status, WNOHANG);
|
||||
}
|
||||
|
||||
/* Don't use the glibc version, which caches the result in TLS. It misses some
|
||||
|
@ -151,6 +179,9 @@ void init_new_thread_signals(void)
|
|||
set_handler(SIGBUS);
|
||||
signal(SIGHUP, SIG_IGN);
|
||||
set_handler(SIGIO);
|
||||
/* We (currently) only use the child reaper IRQ in seccomp mode */
|
||||
if (using_seccomp)
|
||||
set_handler(SIGCHLD);
|
||||
signal(SIGWINCH, SIG_IGN);
|
||||
}
|
||||
|
||||
|
|
|
@ -14,8 +14,8 @@
|
|||
|
||||
/* This is set once at boot time and not changed thereafter */
|
||||
|
||||
static unsigned long exec_regs[MAX_REG_NR];
|
||||
static unsigned long *exec_fp_regs;
|
||||
unsigned long exec_regs[MAX_REG_NR];
|
||||
unsigned long *exec_fp_regs;
|
||||
|
||||
int init_pid_registers(int pid)
|
||||
{
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <signal.h>
|
||||
#include <string.h>
|
||||
#include <sys/epoll.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <kern_util.h>
|
||||
#include <init.h>
|
||||
#include <os.h>
|
||||
|
@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused)
|
|||
__func__, errno);
|
||||
}
|
||||
|
||||
CATCH_EINTR(r = tgkill(pid, pid, SIGIO));
|
||||
CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO));
|
||||
if (r < 0)
|
||||
printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
|
|
|
@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) =
|
|||
[SIGBUS] = relay_signal,
|
||||
[SIGSEGV] = segv_handler,
|
||||
[SIGIO] = sigio_handler,
|
||||
[SIGCHLD] = sigchld_handler,
|
||||
};
|
||||
|
||||
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
||||
|
@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
|||
}
|
||||
|
||||
/* enable signals if sig isn't IRQ signal */
|
||||
if ((sig != SIGIO) && (sig != SIGWINCH))
|
||||
if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
|
||||
unblock_signals_trace();
|
||||
|
||||
(*sig_info[sig])(sig, si, &r, mc);
|
||||
|
@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
|
|||
#define SIGALRM_BIT 1
|
||||
#define SIGALRM_MASK (1 << SIGALRM_BIT)
|
||||
|
||||
#define SIGCHLD_BIT 2
|
||||
#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
|
||||
|
||||
int signals_enabled;
|
||||
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
|
||||
static int signals_blocked, signals_blocked_pending;
|
||||
|
@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
|
|||
return;
|
||||
}
|
||||
|
||||
if (!enabled && (sig == SIGCHLD)) {
|
||||
signals_pending |= SIGCHLD_MASK;
|
||||
return;
|
||||
}
|
||||
|
||||
block_signals_trace();
|
||||
|
||||
sig_handler_common(sig, si, mc);
|
||||
|
@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
|
|||
|
||||
[SIGIO] = sig_handler,
|
||||
[SIGWINCH] = sig_handler,
|
||||
/* SIGCHLD is only actually registered in seccomp mode. */
|
||||
[SIGCHLD] = sig_handler,
|
||||
[SIGALRM] = timer_alarm_handler,
|
||||
|
||||
[SIGUSR1] = sigusr1_handler,
|
||||
|
@ -309,6 +320,12 @@ void unblock_signals(void)
|
|||
if (save_pending & SIGIO_MASK)
|
||||
sig_handler_common(SIGIO, NULL, NULL);
|
||||
|
||||
if (save_pending & SIGCHLD_MASK) {
|
||||
struct uml_pt_regs regs = {};
|
||||
|
||||
sigchld_handler(SIGCHLD, NULL, ®s, NULL);
|
||||
}
|
||||
|
||||
/* Do not reenter the handler */
|
||||
|
||||
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
|
||||
|
|
|
@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp)
|
|||
|
||||
print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
|
||||
16, 4, sc, sizeof(*sc), 0);
|
||||
|
||||
if (using_seccomp) {
|
||||
printk(UM_KERN_ERR "%s: FD map num: %d", __func__,
|
||||
mm_idp->syscall_fd_num);
|
||||
print_hex_dump(UM_KERN_ERR,
|
||||
" FD map: ", 0, 16,
|
||||
sizeof(mm_idp->syscall_fd_map[0]),
|
||||
mm_idp->syscall_fd_map,
|
||||
sizeof(mm_idp->syscall_fd_map), 0);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
|
||||
|
@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
|
|||
int n, i;
|
||||
int err, pid = mm_idp->pid;
|
||||
|
||||
n = ptrace_setregs(pid, syscall_regs);
|
||||
if (n < 0) {
|
||||
printk(UM_KERN_ERR "Registers - \n");
|
||||
for (i = 0; i < MAX_REG_NR; i++)
|
||||
printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
|
||||
panic("%s : PTRACE_SETREGS failed, errno = %d\n",
|
||||
__func__, -n);
|
||||
}
|
||||
|
||||
/* Inform process how much we have filled in. */
|
||||
proc_data->syscall_data_len = mm_idp->syscall_data_len;
|
||||
|
||||
err = ptrace(PTRACE_CONT, pid, 0, 0);
|
||||
if (err)
|
||||
panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
|
||||
errno);
|
||||
if (using_seccomp) {
|
||||
proc_data->restart_wait = 1;
|
||||
wait_stub_done_seccomp(mm_idp, 0, 1);
|
||||
} else {
|
||||
n = ptrace_setregs(pid, syscall_regs);
|
||||
if (n < 0) {
|
||||
printk(UM_KERN_ERR "Registers -\n");
|
||||
for (i = 0; i < MAX_REG_NR; i++)
|
||||
printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
|
||||
panic("%s : PTRACE_SETREGS failed, errno = %d\n",
|
||||
__func__, -n);
|
||||
}
|
||||
|
||||
wait_stub_done(pid);
|
||||
err = ptrace(PTRACE_CONT, pid, 0, 0);
|
||||
if (err)
|
||||
panic("Failed to continue stub, pid = %d, errno = %d\n",
|
||||
pid, errno);
|
||||
|
||||
wait_stub_done(pid);
|
||||
}
|
||||
|
||||
/*
|
||||
* proc_data->err will be non-zero if there was an (unexpected) error.
|
||||
* proc_data->err will be negative if there was an (unexpected) error.
|
||||
* In that case, syscall_data_len points to the last executed syscall,
|
||||
* otherwise it will be zero (but we do not need to rely on that).
|
||||
*/
|
||||
|
@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
|
|||
mm_idp->syscall_data_len = 0;
|
||||
}
|
||||
|
||||
if (using_seccomp)
|
||||
mm_idp->syscall_fd_num = 0;
|
||||
|
||||
return mm_idp->syscall_data_len;
|
||||
}
|
||||
|
||||
|
@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
|
|||
return NULL;
|
||||
}
|
||||
|
||||
static int get_stub_fd(struct mm_id *mm_idp, int fd)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Find an FD slot (or flush and use first) */
|
||||
if (!using_seccomp)
|
||||
return fd;
|
||||
|
||||
/* Already crashed, value does not matter */
|
||||
if (mm_idp->syscall_data_len < 0)
|
||||
return 0;
|
||||
|
||||
/* Find existing FD in map if we can allocate another syscall */
|
||||
if (mm_idp->syscall_data_len <
|
||||
ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) {
|
||||
for (i = 0; i < mm_idp->syscall_fd_num; i++) {
|
||||
if (mm_idp->syscall_fd_map[i] == fd)
|
||||
return i;
|
||||
}
|
||||
|
||||
if (mm_idp->syscall_fd_num < STUB_MAX_FDS) {
|
||||
i = mm_idp->syscall_fd_num;
|
||||
mm_idp->syscall_fd_map[i] = fd;
|
||||
|
||||
mm_idp->syscall_fd_num++;
|
||||
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
/* FD map full or no syscall space available, continue after flush */
|
||||
do_syscall_stub(mm_idp);
|
||||
mm_idp->syscall_fd_map[0] = fd;
|
||||
mm_idp->syscall_fd_num = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
|
||||
int phys_fd, unsigned long long offset)
|
||||
{
|
||||
|
@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
|
|||
|
||||
/* Compress with previous syscall if that is possible */
|
||||
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
|
||||
if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
|
||||
if (sc && sc->mem.prot == prot &&
|
||||
sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
|
||||
sc->mem.length += len;
|
||||
return 0;
|
||||
int prev_fd = sc->mem.fd;
|
||||
|
||||
if (using_seccomp)
|
||||
prev_fd = mm_idp->syscall_fd_map[sc->mem.fd];
|
||||
|
||||
if (phys_fd == prev_fd) {
|
||||
sc->mem.length += len;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
phys_fd = get_stub_fd(mm_idp, phys_fd);
|
||||
|
||||
sc = syscall_stub_alloc(mm_idp);
|
||||
sc->syscall = STUB_SYSCALL_MMAP;
|
||||
sc->mem.addr = virt;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
|
||||
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
|
||||
* Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
@ -15,6 +16,7 @@
|
|||
#include <sys/mman.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/socket.h>
|
||||
#include <asm/unistd.h>
|
||||
#include <as-layout.h>
|
||||
#include <init.h>
|
||||
|
@ -25,8 +27,11 @@
|
|||
#include <registers.h>
|
||||
#include <skas.h>
|
||||
#include <sysdep/stub.h>
|
||||
#include <sysdep/mcontext.h>
|
||||
#include <linux/futex.h>
|
||||
#include <linux/threads.h>
|
||||
#include <timetravel.h>
|
||||
#include <asm-generic/rwonce.h>
|
||||
#include "../internal.h"
|
||||
|
||||
int is_skas_winch(int pid, int fd, void *data)
|
||||
|
@ -142,6 +147,105 @@ bad_wait:
|
|||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
|
||||
{
|
||||
struct stub_data *data = (void *)mm_idp->stack;
|
||||
int ret;
|
||||
|
||||
do {
|
||||
const char byte = 0;
|
||||
struct iovec iov = {
|
||||
.iov_base = (void *)&byte,
|
||||
.iov_len = sizeof(byte),
|
||||
};
|
||||
union {
|
||||
char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))];
|
||||
struct cmsghdr align;
|
||||
} ctrl;
|
||||
struct msghdr msgh = {
|
||||
.msg_iov = &iov,
|
||||
.msg_iovlen = 1,
|
||||
};
|
||||
|
||||
if (!running) {
|
||||
if (mm_idp->syscall_fd_num) {
|
||||
unsigned int fds_size =
|
||||
sizeof(int) * mm_idp->syscall_fd_num;
|
||||
struct cmsghdr *cmsg;
|
||||
|
||||
msgh.msg_control = ctrl.data;
|
||||
msgh.msg_controllen = CMSG_SPACE(fds_size);
|
||||
cmsg = CMSG_FIRSTHDR(&msgh);
|
||||
cmsg->cmsg_level = SOL_SOCKET;
|
||||
cmsg->cmsg_type = SCM_RIGHTS;
|
||||
cmsg->cmsg_len = CMSG_LEN(fds_size);
|
||||
memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map,
|
||||
fds_size);
|
||||
|
||||
CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock,
|
||||
&msgh, 0));
|
||||
}
|
||||
|
||||
data->signal = 0;
|
||||
data->futex = FUTEX_IN_CHILD;
|
||||
CATCH_EINTR(syscall(__NR_futex, &data->futex,
|
||||
FUTEX_WAKE, 1, NULL, NULL, 0));
|
||||
}
|
||||
|
||||
do {
|
||||
/*
|
||||
* We need to check whether the child is still alive
|
||||
* before and after the FUTEX_WAIT call. Before, in
|
||||
* case it just died but we still updated data->futex
|
||||
* to FUTEX_IN_CHILD. And after, in case it died while
|
||||
* we were waiting (and SIGCHLD woke us up, see the
|
||||
* IRQ handler in mmu.c).
|
||||
*
|
||||
* Either way, if PID is negative, then we have no
|
||||
* choice but to kill the task.
|
||||
*/
|
||||
if (__READ_ONCE(mm_idp->pid) < 0)
|
||||
goto out_kill;
|
||||
|
||||
ret = syscall(__NR_futex, &data->futex,
|
||||
FUTEX_WAIT, FUTEX_IN_CHILD,
|
||||
NULL, NULL, 0);
|
||||
if (ret < 0 && errno != EINTR && errno != EAGAIN) {
|
||||
printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
goto out_kill;
|
||||
}
|
||||
} while (data->futex == FUTEX_IN_CHILD);
|
||||
|
||||
if (__READ_ONCE(mm_idp->pid) < 0)
|
||||
goto out_kill;
|
||||
|
||||
running = 0;
|
||||
|
||||
/* We may receive a SIGALRM before SIGSYS, iterate again. */
|
||||
} while (wait_sigsys && data->signal == SIGALRM);
|
||||
|
||||
if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
|
||||
printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
|
||||
goto out_kill;
|
||||
}
|
||||
|
||||
if (wait_sigsys && data->signal != SIGSYS) {
|
||||
printk(UM_KERN_ERR "%s : expected SIGSYS but got %d",
|
||||
__func__, data->signal);
|
||||
goto out_kill;
|
||||
}
|
||||
|
||||
return;
|
||||
|
||||
out_kill:
|
||||
printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n",
|
||||
__func__, mm_idp->pid, errno);
|
||||
/* This is not true inside start_userspace */
|
||||
if (current_mm_id() == mm_idp)
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
extern unsigned long current_stub_stack(void);
|
||||
|
||||
static void get_skas_faultinfo(int pid, struct faultinfo *fi)
|
||||
|
@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi)
|
|||
memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
|
||||
}
|
||||
|
||||
static void handle_segv(int pid, struct uml_pt_regs *regs)
|
||||
{
|
||||
get_skas_faultinfo(pid, ®s->faultinfo);
|
||||
segv(regs->faultinfo, 0, 1, NULL, NULL);
|
||||
}
|
||||
|
||||
static void handle_trap(int pid, struct uml_pt_regs *regs)
|
||||
{
|
||||
if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
|
||||
|
@ -181,29 +279,48 @@ extern char __syscall_stub_start[];
|
|||
|
||||
static int stub_exe_fd;
|
||||
|
||||
struct tramp_data {
|
||||
struct stub_data *stub_data;
|
||||
/* 0 is inherited, 1 is the kernel side */
|
||||
int sockpair[2];
|
||||
};
|
||||
|
||||
#ifndef CLOSE_RANGE_CLOEXEC
|
||||
#define CLOSE_RANGE_CLOEXEC (1U << 2)
|
||||
#endif
|
||||
|
||||
static int userspace_tramp(void *stack)
|
||||
static int userspace_tramp(void *data)
|
||||
{
|
||||
struct tramp_data *tramp_data = data;
|
||||
char *const argv[] = { "uml-userspace", NULL };
|
||||
int pipe_fds[2];
|
||||
unsigned long long offset;
|
||||
struct stub_init_data init_data = {
|
||||
.seccomp = using_seccomp,
|
||||
.stub_start = STUB_START,
|
||||
.segv_handler = STUB_CODE +
|
||||
(unsigned long) stub_segv_handler -
|
||||
(unsigned long) __syscall_stub_start,
|
||||
};
|
||||
struct iomem_region *iomem;
|
||||
int ret;
|
||||
|
||||
if (using_seccomp) {
|
||||
init_data.signal_handler = STUB_CODE +
|
||||
(unsigned long) stub_signal_interrupt -
|
||||
(unsigned long) __syscall_stub_start;
|
||||
init_data.signal_restorer = STUB_CODE +
|
||||
(unsigned long) stub_signal_restorer -
|
||||
(unsigned long) __syscall_stub_start;
|
||||
} else {
|
||||
init_data.signal_handler = STUB_CODE +
|
||||
(unsigned long) stub_segv_handler -
|
||||
(unsigned long) __syscall_stub_start;
|
||||
init_data.signal_restorer = 0;
|
||||
}
|
||||
|
||||
init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
|
||||
&offset);
|
||||
init_data.stub_code_offset = MMAP_OFFSET(offset);
|
||||
|
||||
init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset);
|
||||
init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data),
|
||||
&offset);
|
||||
init_data.stub_data_offset = MMAP_OFFSET(offset);
|
||||
|
||||
/*
|
||||
|
@ -214,20 +331,21 @@ static int userspace_tramp(void *stack)
|
|||
syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC);
|
||||
|
||||
fcntl(init_data.stub_data_fd, F_SETFD, 0);
|
||||
for (iomem = iomem_regions; iomem; iomem = iomem->next)
|
||||
fcntl(iomem->fd, F_SETFD, 0);
|
||||
|
||||
/* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */
|
||||
if (pipe(pipe_fds))
|
||||
exit(2);
|
||||
/* In SECCOMP mode, these FDs are passed when needed */
|
||||
if (!using_seccomp) {
|
||||
for (iomem = iomem_regions; iomem; iomem = iomem->next)
|
||||
fcntl(iomem->fd, F_SETFD, 0);
|
||||
}
|
||||
|
||||
if (dup2(pipe_fds[0], 0) < 0)
|
||||
/* dup2 signaling FD/socket to STDIN */
|
||||
if (dup2(tramp_data->sockpair[0], 0) < 0)
|
||||
exit(3);
|
||||
close(pipe_fds[0]);
|
||||
close(tramp_data->sockpair[0]);
|
||||
|
||||
/* Write init_data and close write side */
|
||||
ret = write(pipe_fds[1], &init_data, sizeof(init_data));
|
||||
close(pipe_fds[1]);
|
||||
ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data));
|
||||
close(tramp_data->sockpair[1]);
|
||||
|
||||
if (ret != sizeof(init_data))
|
||||
exit(4);
|
||||
|
@ -315,11 +433,12 @@ static int __init init_stub_exe_fd(void)
|
|||
}
|
||||
__initcall(init_stub_exe_fd);
|
||||
|
||||
int using_seccomp;
|
||||
int userspace_pid[NR_CPUS];
|
||||
|
||||
/**
|
||||
* start_userspace() - prepare a new userspace process
|
||||
* @stub_stack: pointer to the stub stack.
|
||||
* @mm_id: The corresponding struct mm_id
|
||||
*
|
||||
* Setups a new temporary stack page that is used while userspace_tramp() runs
|
||||
* Clones the kernel process into a new userspace process, with FDs only.
|
||||
|
@ -328,11 +447,15 @@ int userspace_pid[NR_CPUS];
|
|||
* when negative: an error number.
|
||||
* FIXME: can PIDs become negative?!
|
||||
*/
|
||||
int start_userspace(unsigned long stub_stack)
|
||||
int start_userspace(struct mm_id *mm_id)
|
||||
{
|
||||
struct stub_data *proc_data = (void *)mm_id->stack;
|
||||
struct tramp_data tramp_data = {
|
||||
.stub_data = proc_data,
|
||||
};
|
||||
void *stack;
|
||||
unsigned long sp;
|
||||
int pid, status, n, err;
|
||||
int status, n, err;
|
||||
|
||||
/* setup a temporary stack page */
|
||||
stack = mmap(NULL, UM_KERN_PAGE_SIZE,
|
||||
|
@ -348,40 +471,55 @@ int start_userspace(unsigned long stub_stack)
|
|||
/* set stack pointer to the end of the stack page, so it can grow downwards */
|
||||
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
|
||||
|
||||
/* clone into new userspace process */
|
||||
pid = clone(userspace_tramp, (void *) sp,
|
||||
CLONE_VFORK | CLONE_VM | SIGCHLD,
|
||||
(void *)stub_stack);
|
||||
if (pid < 0) {
|
||||
/* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */
|
||||
if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
|
||||
printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
return err;
|
||||
}
|
||||
|
||||
do {
|
||||
CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
|
||||
if (n < 0) {
|
||||
if (using_seccomp)
|
||||
proc_data->futex = FUTEX_IN_CHILD;
|
||||
|
||||
mm_id->pid = clone(userspace_tramp, (void *) sp,
|
||||
CLONE_VFORK | CLONE_VM | SIGCHLD,
|
||||
(void *)&tramp_data);
|
||||
if (mm_id->pid < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
goto out_close;
|
||||
}
|
||||
|
||||
if (using_seccomp) {
|
||||
wait_stub_done_seccomp(mm_id, 1, 1);
|
||||
} else {
|
||||
do {
|
||||
CATCH_EINTR(n = waitpid(mm_id->pid, &status,
|
||||
WUNTRACED | __WALL));
|
||||
if (n < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
goto out_kill;
|
||||
}
|
||||
} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
|
||||
|
||||
if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
|
||||
err = -EINVAL;
|
||||
printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
|
||||
__func__, status);
|
||||
goto out_kill;
|
||||
}
|
||||
|
||||
if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL,
|
||||
(void *) PTRACE_O_TRACESYSGOOD) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
|
||||
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
goto out_kill;
|
||||
}
|
||||
} while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
|
||||
|
||||
if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
|
||||
err = -EINVAL;
|
||||
printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
|
||||
__func__, status);
|
||||
goto out_kill;
|
||||
}
|
||||
|
||||
if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
|
||||
(void *) PTRACE_O_TRACESYSGOOD) < 0) {
|
||||
err = -errno;
|
||||
printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
goto out_kill;
|
||||
}
|
||||
|
||||
if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
|
||||
|
@ -391,10 +529,22 @@ int start_userspace(unsigned long stub_stack)
|
|||
goto out_kill;
|
||||
}
|
||||
|
||||
return pid;
|
||||
close(tramp_data.sockpair[0]);
|
||||
if (using_seccomp)
|
||||
mm_id->sock = tramp_data.sockpair[1];
|
||||
else
|
||||
close(tramp_data.sockpair[1]);
|
||||
|
||||
return 0;
|
||||
|
||||
out_kill:
|
||||
os_kill_ptraced_process(mm_id->pid, 1);
|
||||
out_close:
|
||||
close(tramp_data.sockpair[0]);
|
||||
close(tramp_data.sockpair[1]);
|
||||
|
||||
mm_id->pid = -1;
|
||||
|
||||
out_kill:
|
||||
os_kill_ptraced_process(pid, 1);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -404,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies;
|
|||
void userspace(struct uml_pt_regs *regs)
|
||||
{
|
||||
int err, status, op, pid = userspace_pid[0];
|
||||
siginfo_t si;
|
||||
siginfo_t si_ptrace;
|
||||
siginfo_t *si;
|
||||
int sig;
|
||||
|
||||
/* Handle any immediate reschedules or signals */
|
||||
interrupt_end();
|
||||
|
@ -437,103 +589,177 @@ void userspace(struct uml_pt_regs *regs)
|
|||
|
||||
current_mm_sync();
|
||||
|
||||
/* Flush out any pending syscalls */
|
||||
err = syscall_stub_flush(current_mm_id());
|
||||
if (err) {
|
||||
if (err == -ENOMEM)
|
||||
report_enomem();
|
||||
if (using_seccomp) {
|
||||
struct mm_id *mm_id = current_mm_id();
|
||||
struct stub_data *proc_data = (void *) mm_id->stack;
|
||||
int ret;
|
||||
|
||||
printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
|
||||
__func__, -err);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
ret = set_stub_state(regs, proc_data, singlestepping());
|
||||
if (ret) {
|
||||
printk(UM_KERN_ERR "%s - failed to set regs: %d",
|
||||
__func__, ret);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
/*
|
||||
* This can legitimately fail if the process loads a
|
||||
* bogus value into a segment register. It will
|
||||
* segfault and PTRACE_GETREGS will read that value
|
||||
* out of the process. However, PTRACE_SETREGS will
|
||||
* fail. In this case, there is nothing to do but
|
||||
* just kill the process.
|
||||
*/
|
||||
if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
|
||||
printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
/* Must have been reset by the syscall caller */
|
||||
if (proc_data->restart_wait != 0)
|
||||
panic("Programming error: Flag to only run syscalls in child was not cleared!");
|
||||
|
||||
if (put_fp_registers(pid, regs->fp)) {
|
||||
printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
/* Mark pending syscalls for flushing */
|
||||
proc_data->syscall_data_len = mm_id->syscall_data_len;
|
||||
|
||||
if (singlestepping())
|
||||
op = PTRACE_SYSEMU_SINGLESTEP;
|
||||
else
|
||||
op = PTRACE_SYSEMU;
|
||||
wait_stub_done_seccomp(mm_id, 0, 0);
|
||||
|
||||
if (ptrace(op, pid, 0, 0)) {
|
||||
printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
|
||||
__func__, op, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
sig = proc_data->signal;
|
||||
|
||||
CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
if (sig == SIGTRAP && proc_data->err != 0) {
|
||||
printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
|
||||
__func__);
|
||||
syscall_stub_dump_error(mm_id);
|
||||
mm_id->syscall_data_len = proc_data->err;
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
regs->is_user = 1;
|
||||
if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
|
||||
printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
mm_id->syscall_data_len = 0;
|
||||
mm_id->syscall_fd_num = 0;
|
||||
|
||||
if (get_fp_registers(pid, regs->fp)) {
|
||||
printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
ret = get_stub_state(regs, proc_data, NULL);
|
||||
if (ret) {
|
||||
printk(UM_KERN_ERR "%s - failed to get regs: %d",
|
||||
__func__, ret);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
|
||||
panic("%s - Invalid siginfo offset from child",
|
||||
__func__);
|
||||
si = (void *)&proc_data->sigstack[proc_data->si_offset];
|
||||
|
||||
regs->is_user = 1;
|
||||
|
||||
/* Fill in ORIG_RAX and extract fault information */
|
||||
PT_SYSCALL_NR(regs->gp) = si->si_syscall;
|
||||
if (sig == SIGSEGV) {
|
||||
mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
|
||||
|
||||
GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
|
||||
}
|
||||
} else {
|
||||
/* Flush out any pending syscalls */
|
||||
err = syscall_stub_flush(current_mm_id());
|
||||
if (err) {
|
||||
if (err == -ENOMEM)
|
||||
report_enomem();
|
||||
|
||||
printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
|
||||
__func__, -err);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
/*
|
||||
* This can legitimately fail if the process loads a
|
||||
* bogus value into a segment register. It will
|
||||
* segfault and PTRACE_GETREGS will read that value
|
||||
* out of the process. However, PTRACE_SETREGS will
|
||||
* fail. In this case, there is nothing to do but
|
||||
* just kill the process.
|
||||
*/
|
||||
if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
|
||||
printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
if (put_fp_registers(pid, regs->fp)) {
|
||||
printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
if (singlestepping())
|
||||
op = PTRACE_SYSEMU_SINGLESTEP;
|
||||
else
|
||||
op = PTRACE_SYSEMU;
|
||||
|
||||
if (ptrace(op, pid, 0, 0)) {
|
||||
printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
|
||||
__func__, op, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
|
||||
if (err < 0) {
|
||||
printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
regs->is_user = 1;
|
||||
if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
|
||||
printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
if (get_fp_registers(pid, regs->fp)) {
|
||||
printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
|
||||
__func__, errno);
|
||||
fatal_sigsegv();
|
||||
}
|
||||
|
||||
if (WIFSTOPPED(status)) {
|
||||
sig = WSTOPSIG(status);
|
||||
|
||||
/*
|
||||
* These signal handlers need the si argument
|
||||
* and SIGSEGV needs the faultinfo.
|
||||
* The SIGIO and SIGALARM handlers which constitute
|
||||
* the majority of invocations, do not use it.
|
||||
*/
|
||||
switch (sig) {
|
||||
case SIGSEGV:
|
||||
get_skas_faultinfo(pid,
|
||||
®s->faultinfo);
|
||||
fallthrough;
|
||||
case SIGTRAP:
|
||||
case SIGILL:
|
||||
case SIGBUS:
|
||||
case SIGFPE:
|
||||
case SIGWINCH:
|
||||
ptrace(PTRACE_GETSIGINFO, pid, 0,
|
||||
(struct siginfo *)&si_ptrace);
|
||||
si = &si_ptrace;
|
||||
break;
|
||||
default:
|
||||
si = NULL;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
sig = 0;
|
||||
}
|
||||
}
|
||||
|
||||
UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
|
||||
|
||||
if (WIFSTOPPED(status)) {
|
||||
int sig = WSTOPSIG(status);
|
||||
|
||||
/* These signal handlers need the si argument.
|
||||
* The SIGIO and SIGALARM handlers which constitute the
|
||||
* majority of invocations, do not use it.
|
||||
*/
|
||||
if (sig) {
|
||||
switch (sig) {
|
||||
case SIGSEGV:
|
||||
case SIGTRAP:
|
||||
case SIGILL:
|
||||
case SIGBUS:
|
||||
case SIGFPE:
|
||||
case SIGWINCH:
|
||||
ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (sig) {
|
||||
case SIGSEGV:
|
||||
if (PTRACE_FULL_FAULTINFO) {
|
||||
get_skas_faultinfo(pid,
|
||||
®s->faultinfo);
|
||||
(*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
|
||||
if (using_seccomp || PTRACE_FULL_FAULTINFO)
|
||||
(*sig_info[SIGSEGV])(SIGSEGV,
|
||||
(struct siginfo *)si,
|
||||
regs, NULL);
|
||||
}
|
||||
else handle_segv(pid, regs);
|
||||
else
|
||||
segv(regs->faultinfo, 0, 1, NULL, NULL);
|
||||
|
||||
break;
|
||||
case SIGSYS:
|
||||
handle_syscall(regs);
|
||||
break;
|
||||
case SIGTRAP + 0x80:
|
||||
handle_trap(pid, regs);
|
||||
break;
|
||||
case SIGTRAP:
|
||||
relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL);
|
||||
relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL);
|
||||
break;
|
||||
case SIGALRM:
|
||||
break;
|
||||
|
@ -543,7 +769,7 @@ void userspace(struct uml_pt_regs *regs)
|
|||
case SIGFPE:
|
||||
case SIGWINCH:
|
||||
block_signals_trace();
|
||||
(*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL);
|
||||
(*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL);
|
||||
unblock_signals_trace();
|
||||
break;
|
||||
default:
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
|
||||
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
|
||||
*/
|
||||
|
||||
|
@ -24,6 +25,13 @@
|
|||
#include <kern_util.h>
|
||||
#include <mem_user.h>
|
||||
#include <ptrace_user.h>
|
||||
#include <stdbool.h>
|
||||
#include <stub-data.h>
|
||||
#include <sys/prctl.h>
|
||||
#include <linux/seccomp.h>
|
||||
#include <linux/filter.h>
|
||||
#include <sysdep/mcontext.h>
|
||||
#include <sysdep/stub.h>
|
||||
#include <registers.h>
|
||||
#include <skas.h>
|
||||
#include "internal.h"
|
||||
|
@ -224,6 +232,140 @@ static void __init check_ptrace(void)
|
|||
check_sysemu();
|
||||
}
|
||||
|
||||
extern unsigned long host_fp_size;
|
||||
extern unsigned long exec_regs[MAX_REG_NR];
|
||||
extern unsigned long *exec_fp_regs;
|
||||
|
||||
__initdata static struct stub_data *seccomp_test_stub_data;
|
||||
|
||||
static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
|
||||
{
|
||||
ucontext_t *uc = p;
|
||||
|
||||
/* Stow away the location of the mcontext in the stack */
|
||||
seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext -
|
||||
(unsigned long)&seccomp_test_stub_data->sigstack[0];
|
||||
|
||||
/* Prevent libc from clearing memory (mctx_offset in particular) */
|
||||
syscall(__NR_exit, 0);
|
||||
}
|
||||
|
||||
static int __init seccomp_helper(void *data)
|
||||
{
|
||||
static struct sock_filter filter[] = {
|
||||
BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
|
||||
offsetof(struct seccomp_data, nr)),
|
||||
BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0),
|
||||
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
|
||||
BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
|
||||
};
|
||||
static struct sock_fprog prog = {
|
||||
.len = ARRAY_SIZE(filter),
|
||||
.filter = filter,
|
||||
};
|
||||
struct sigaction sa;
|
||||
|
||||
/* close_range is needed for the stub */
|
||||
if (stub_syscall3(__NR_close_range, 1, ~0U, 0))
|
||||
exit(1);
|
||||
|
||||
set_sigstack(seccomp_test_stub_data->sigstack,
|
||||
sizeof(seccomp_test_stub_data->sigstack));
|
||||
|
||||
sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
|
||||
sa.sa_sigaction = (void *) sigsys_handler;
|
||||
sa.sa_restorer = NULL;
|
||||
if (sigaction(SIGSYS, &sa, NULL) < 0)
|
||||
exit(2);
|
||||
|
||||
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||
if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
|
||||
SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
|
||||
exit(3);
|
||||
|
||||
sleep(0);
|
||||
|
||||
/* Never reached. */
|
||||
_exit(4);
|
||||
}
|
||||
|
||||
static bool __init init_seccomp(void)
|
||||
{
|
||||
int pid;
|
||||
int status;
|
||||
int n;
|
||||
unsigned long sp;
|
||||
|
||||
/*
|
||||
* We check that we can install a seccomp filter and then exit(0)
|
||||
* from a trapped syscall.
|
||||
*
|
||||
* Note that we cannot verify that no seccomp filter already exists
|
||||
* for a syscall that results in the process/thread to be killed.
|
||||
*/
|
||||
|
||||
os_info("Checking that seccomp filters can be installed...");
|
||||
|
||||
seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data),
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_SHARED | MAP_ANON, 0, 0);
|
||||
|
||||
/* Use the syscall data area as stack, we just need something */
|
||||
sp = (unsigned long)&seccomp_test_stub_data->syscall_data +
|
||||
sizeof(seccomp_test_stub_data->syscall_data) -
|
||||
sizeof(void *);
|
||||
pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL);
|
||||
|
||||
if (pid < 0)
|
||||
fatal_perror("check_seccomp : clone failed");
|
||||
|
||||
CATCH_EINTR(n = waitpid(pid, &status, __WCLONE));
|
||||
if (n < 0)
|
||||
fatal_perror("check_seccomp : waitpid failed");
|
||||
|
||||
if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
|
||||
struct uml_pt_regs *regs;
|
||||
unsigned long fp_size;
|
||||
int r;
|
||||
|
||||
/* Fill in the host_fp_size from the mcontext. */
|
||||
regs = calloc(1, sizeof(struct uml_pt_regs));
|
||||
get_stub_state(regs, seccomp_test_stub_data, &fp_size);
|
||||
host_fp_size = fp_size;
|
||||
free(regs);
|
||||
|
||||
/* Repeat with the correct size */
|
||||
regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size);
|
||||
r = get_stub_state(regs, seccomp_test_stub_data, NULL);
|
||||
|
||||
/* Store as the default startup registers */
|
||||
exec_fp_regs = malloc(host_fp_size);
|
||||
memcpy(exec_regs, regs->gp, sizeof(exec_regs));
|
||||
memcpy(exec_fp_regs, regs->fp, host_fp_size);
|
||||
|
||||
munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
|
||||
|
||||
free(regs);
|
||||
|
||||
if (r) {
|
||||
os_info("failed to fetch registers: %d\n", r);
|
||||
return false;
|
||||
}
|
||||
|
||||
os_info("OK\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
|
||||
os_info("missing\n");
|
||||
else
|
||||
os_info("error\n");
|
||||
|
||||
munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static void __init check_coredump_limit(void)
|
||||
{
|
||||
struct rlimit lim;
|
||||
|
@ -278,6 +420,44 @@ void __init get_host_cpu_features(
|
|||
}
|
||||
}
|
||||
|
||||
static int seccomp_config __initdata;
|
||||
|
||||
static int __init uml_seccomp_config(char *line, int *add)
|
||||
{
|
||||
*add = 0;
|
||||
|
||||
if (strcmp(line, "off") == 0)
|
||||
seccomp_config = 0;
|
||||
else if (strcmp(line, "auto") == 0)
|
||||
seccomp_config = 1;
|
||||
else if (strcmp(line, "on") == 0)
|
||||
seccomp_config = 2;
|
||||
else
|
||||
fatal("Invalid seccomp option '%s', expected on/auto/off\n",
|
||||
line);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
__uml_setup("seccomp=", uml_seccomp_config,
|
||||
"seccomp=<on/auto/off>\n"
|
||||
" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n"
|
||||
" processes work collaboratively with the kernel instead of being\n"
|
||||
" traced using ptrace. All syscalls from the application are caught and\n"
|
||||
" redirected using a signal. This signal handler in turn is permitted to\n"
|
||||
" do the selected set of syscalls to communicate with the UML kernel and\n"
|
||||
" do the required memory management.\n"
|
||||
"\n"
|
||||
" This method is overall faster than the ptrace based userspace, primarily\n"
|
||||
" because it reduces the number of context switches for (minor) page faults.\n"
|
||||
"\n"
|
||||
" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n"
|
||||
" userspace from reading and writing all physical memory. Userspace\n"
|
||||
" processes could also trick the stub into disabling SIGALRM which\n"
|
||||
" prevents it from being interrupted for scheduling purposes.\n"
|
||||
"\n"
|
||||
" This is insecure and should only be used with a trusted userspace\n\n"
|
||||
);
|
||||
|
||||
void __init os_early_checks(void)
|
||||
{
|
||||
|
@ -286,13 +466,24 @@ void __init os_early_checks(void)
|
|||
/* Print out the core dump limits early */
|
||||
check_coredump_limit();
|
||||
|
||||
check_ptrace();
|
||||
|
||||
/* Need to check this early because mmapping happens before the
|
||||
* kernel is running.
|
||||
*/
|
||||
check_tmpexec();
|
||||
|
||||
if (seccomp_config) {
|
||||
if (init_seccomp()) {
|
||||
using_seccomp = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
if (seccomp_config == 2)
|
||||
fatal("SECCOMP userspace requested but not functional!\n");
|
||||
}
|
||||
|
||||
using_seccomp = 0;
|
||||
check_ptrace();
|
||||
|
||||
pid = start_ptraced_child();
|
||||
if (init_pid_registers(pid))
|
||||
fatal("Failed to initialize default registers");
|
||||
|
|
|
@ -20,6 +20,9 @@
|
|||
*/
|
||||
extern __wsum csum_partial(const void *buff, int len, __wsum sum);
|
||||
|
||||
/* Do not call this directly. Declared for export type visibility. */
|
||||
extern __visible __wsum csum_partial_copy_generic(const void *src, void *dst, int len);
|
||||
|
||||
/**
|
||||
* csum_fold - Fold and invert a 32bit checksum.
|
||||
* sum: 32bit unfolded sum
|
||||
|
|
|
@ -21,10 +21,10 @@
|
|||
|
||||
#include <asm/user.h>
|
||||
|
||||
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
|
||||
static __always_inline void rep_nop(void)
|
||||
/* PAUSE is a good thing to insert into busy-wait loops. */
|
||||
static __always_inline void native_pause(void)
|
||||
{
|
||||
__asm__ __volatile__("rep;nop": : :"memory");
|
||||
__asm__ __volatile__("pause": : :"memory");
|
||||
}
|
||||
|
||||
static __always_inline void cpu_relax(void)
|
||||
|
@ -33,7 +33,7 @@ static __always_inline void cpu_relax(void)
|
|||
time_travel_mode == TT_MODE_EXTERNAL)
|
||||
time_travel_ndelay(1);
|
||||
else
|
||||
rep_nop();
|
||||
native_pause();
|
||||
}
|
||||
|
||||
#define task_pt_regs(t) (&(t)->thread.regs)
|
||||
|
|
|
@ -1,7 +1,10 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
#include <sys/ucontext.h>
|
||||
#define __FRAME_OFFSETS
|
||||
#include <linux/errno.h>
|
||||
#include <linux/string.h>
|
||||
#include <sys/ucontext.h>
|
||||
#include <asm/ptrace.h>
|
||||
#include <asm/sigcontext.h>
|
||||
#include <sysdep/ptrace.h>
|
||||
#include <sysdep/mcontext.h>
|
||||
#include <arch.h>
|
||||
|
@ -18,6 +21,10 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
|
|||
COPY2(UESP, ESP); /* sic */
|
||||
COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
|
||||
COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
|
||||
#undef COPY2
|
||||
#undef COPY
|
||||
#undef COPY_SEG
|
||||
#undef COPY_SEG_CPL3
|
||||
#else
|
||||
#define COPY2(X,Y) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##Y]
|
||||
#define COPY(X) regs->gp[X/sizeof(unsigned long)] = mc->gregs[REG_##X]
|
||||
|
@ -29,6 +36,8 @@ void get_regs_from_mc(struct uml_pt_regs *regs, mcontext_t *mc)
|
|||
COPY2(EFLAGS, EFL);
|
||||
COPY2(CS, CSGSFS);
|
||||
regs->gp[SS / sizeof(unsigned long)] = mc->gregs[REG_CSGSFS] >> 48;
|
||||
#undef COPY2
|
||||
#undef COPY
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -42,3 +51,210 @@ void mc_set_rip(void *_mc, void *target)
|
|||
mc->gregs[REG_RIP] = (unsigned long)target;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Same thing, but the copy macros are turned around. */
|
||||
void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc, int single_stepping)
|
||||
{
|
||||
#ifdef __i386__
|
||||
#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X]
|
||||
#define COPY(X) mc->gregs[REG_##X] = regs->gp[X]
|
||||
#define COPY_SEG(X) mc->gregs[REG_##X] = regs->gp[X] & 0xffff;
|
||||
#define COPY_SEG_CPL3(X) mc->gregs[REG_##X] = (regs->gp[X] & 0xffff) | 3;
|
||||
COPY_SEG(GS); COPY_SEG(FS); COPY_SEG(ES); COPY_SEG(DS);
|
||||
COPY(EDI); COPY(ESI); COPY(EBP);
|
||||
COPY2(UESP, ESP); /* sic */
|
||||
COPY(EBX); COPY(EDX); COPY(ECX); COPY(EAX);
|
||||
COPY(EIP); COPY_SEG_CPL3(CS); COPY(EFL); COPY_SEG_CPL3(SS);
|
||||
#else
|
||||
#define COPY2(X,Y) mc->gregs[REG_##Y] = regs->gp[X/sizeof(unsigned long)]
|
||||
#define COPY(X) mc->gregs[REG_##X] = regs->gp[X/sizeof(unsigned long)]
|
||||
COPY(R8); COPY(R9); COPY(R10); COPY(R11);
|
||||
COPY(R12); COPY(R13); COPY(R14); COPY(R15);
|
||||
COPY(RDI); COPY(RSI); COPY(RBP); COPY(RBX);
|
||||
COPY(RDX); COPY(RAX); COPY(RCX); COPY(RSP);
|
||||
COPY(RIP);
|
||||
COPY2(EFLAGS, EFL);
|
||||
mc->gregs[REG_CSGSFS] = mc->gregs[REG_CSGSFS] & 0xffffffffffffl;
|
||||
mc->gregs[REG_CSGSFS] |= (regs->gp[SS / sizeof(unsigned long)] & 0xffff) << 48;
|
||||
#endif
|
||||
|
||||
if (single_stepping)
|
||||
mc->gregs[REG_EFL] |= X86_EFLAGS_TF;
|
||||
else
|
||||
mc->gregs[REG_EFL] &= ~X86_EFLAGS_TF;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
struct _xstate_64 {
|
||||
struct _fpstate_64 fpstate;
|
||||
struct _header xstate_hdr;
|
||||
struct _ymmh_state ymmh;
|
||||
/* New processor state extensions go here: */
|
||||
};
|
||||
|
||||
/* Not quite the right structures as these contain more information */
|
||||
int um_i387_from_fxsr(struct _fpstate_32 *i387,
|
||||
const struct _fpstate_64 *fxsave);
|
||||
int um_fxsr_from_i387(struct _fpstate_64 *fxsave,
|
||||
const struct _fpstate_32 *from);
|
||||
#else
|
||||
#define _xstate_64 _xstate
|
||||
#endif
|
||||
|
||||
static struct _fpstate *get_fpstate(struct stub_data *data,
|
||||
mcontext_t *mcontext,
|
||||
int *fp_size)
|
||||
{
|
||||
struct _fpstate *res;
|
||||
|
||||
/* Assume floating point registers are on the same page */
|
||||
res = (void *)(((unsigned long)mcontext->fpregs &
|
||||
(UM_KERN_PAGE_SIZE - 1)) +
|
||||
(unsigned long)&data->sigstack[0]);
|
||||
|
||||
if ((void *)res + sizeof(struct _fpstate) >
|
||||
(void *)data->sigstack + sizeof(data->sigstack))
|
||||
return NULL;
|
||||
|
||||
if (res->sw_reserved.magic1 != FP_XSTATE_MAGIC1) {
|
||||
*fp_size = sizeof(struct _fpstate);
|
||||
} else {
|
||||
char *magic2_addr;
|
||||
|
||||
magic2_addr = (void *)res;
|
||||
magic2_addr += res->sw_reserved.extended_size;
|
||||
magic2_addr -= FP_XSTATE_MAGIC2_SIZE;
|
||||
|
||||
/* We still need to be within our stack */
|
||||
if ((void *)magic2_addr >
|
||||
(void *)data->sigstack + sizeof(data->sigstack))
|
||||
return NULL;
|
||||
|
||||
/* If we do not read MAGIC2, then we did something wrong */
|
||||
if (*(__u32 *)magic2_addr != FP_XSTATE_MAGIC2)
|
||||
return NULL;
|
||||
|
||||
/* Remove MAGIC2 from the size, we do not save/restore it */
|
||||
*fp_size = res->sw_reserved.extended_size -
|
||||
FP_XSTATE_MAGIC2_SIZE;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
|
||||
unsigned long *fp_size_out)
|
||||
{
|
||||
mcontext_t *mcontext;
|
||||
struct _fpstate *fpstate_stub;
|
||||
struct _xstate_64 *xstate_stub;
|
||||
int fp_size, xstate_size;
|
||||
|
||||
/* mctx_offset is verified by wait_stub_done_seccomp */
|
||||
mcontext = (void *)&data->sigstack[data->mctx_offset];
|
||||
|
||||
get_regs_from_mc(regs, mcontext);
|
||||
|
||||
fpstate_stub = get_fpstate(data, mcontext, &fp_size);
|
||||
if (!fpstate_stub)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
xstate_stub = (void *)&fpstate_stub->_fxsr_env;
|
||||
xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env);
|
||||
#else
|
||||
xstate_stub = (void *)fpstate_stub;
|
||||
xstate_size = fp_size;
|
||||
#endif
|
||||
|
||||
if (fp_size_out)
|
||||
*fp_size_out = xstate_size;
|
||||
|
||||
if (xstate_size > host_fp_size)
|
||||
return -ENOSPC;
|
||||
|
||||
memcpy(®s->fp, xstate_stub, xstate_size);
|
||||
|
||||
/* We do not need to read the x86_64 FS_BASE/GS_BASE registers as
|
||||
* we do not permit userspace to set them directly.
|
||||
*/
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
/* Read the i387 legacy FP registers */
|
||||
if (um_fxsr_from_i387((void *)®s->fp, fpstate_stub))
|
||||
return -EINVAL;
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Copied because we cannot include regset.h here. */
|
||||
struct task_struct;
|
||||
struct user_regset;
|
||||
struct membuf {
|
||||
void *p;
|
||||
size_t left;
|
||||
};
|
||||
|
||||
int fpregs_legacy_get(struct task_struct *target,
|
||||
const struct user_regset *regset,
|
||||
struct membuf to);
|
||||
|
||||
int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
|
||||
int single_stepping)
|
||||
{
|
||||
mcontext_t *mcontext;
|
||||
struct _fpstate *fpstate_stub;
|
||||
struct _xstate_64 *xstate_stub;
|
||||
int fp_size, xstate_size;
|
||||
|
||||
/* mctx_offset is verified by wait_stub_done_seccomp */
|
||||
mcontext = (void *)&data->sigstack[data->mctx_offset];
|
||||
|
||||
if ((unsigned long)mcontext < (unsigned long)data->sigstack ||
|
||||
(unsigned long)mcontext >
|
||||
(unsigned long) data->sigstack +
|
||||
sizeof(data->sigstack) - sizeof(*mcontext))
|
||||
return -EINVAL;
|
||||
|
||||
get_mc_from_regs(regs, mcontext, single_stepping);
|
||||
|
||||
fpstate_stub = get_fpstate(data, mcontext, &fp_size);
|
||||
if (!fpstate_stub)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_X86_32
|
||||
xstate_stub = (void *)&fpstate_stub->_fxsr_env;
|
||||
xstate_size = fp_size - offsetof(struct _fpstate_32, _fxsr_env);
|
||||
#else
|
||||
xstate_stub = (void *)fpstate_stub;
|
||||
xstate_size = fp_size;
|
||||
#endif
|
||||
|
||||
memcpy(xstate_stub, ®s->fp, xstate_size);
|
||||
|
||||
#ifdef __i386__
|
||||
/*
|
||||
* On x86, the GDT entries are updated by arch_set_tls.
|
||||
*/
|
||||
|
||||
/* Store the i387 legacy FP registers which the host will use */
|
||||
if (um_i387_from_fxsr(fpstate_stub, (void *)®s->fp))
|
||||
return -EINVAL;
|
||||
#else
|
||||
/*
|
||||
* On x86_64, we need to sync the FS_BASE/GS_BASE registers using the
|
||||
* arch specific data.
|
||||
*/
|
||||
if (data->arch_data.fs_base != regs->gp[FS_BASE / sizeof(unsigned long)]) {
|
||||
data->arch_data.fs_base = regs->gp[FS_BASE / sizeof(unsigned long)];
|
||||
data->arch_data.sync |= STUB_SYNC_FS_BASE;
|
||||
}
|
||||
if (data->arch_data.gs_base != regs->gp[GS_BASE / sizeof(unsigned long)]) {
|
||||
data->arch_data.gs_base = regs->gp[GS_BASE / sizeof(unsigned long)];
|
||||
data->arch_data.sync |= STUB_SYNC_GS_BASE;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,8 @@ static inline unsigned short twd_i387_to_fxsr(unsigned short twd)
|
|||
return tmp;
|
||||
}
|
||||
|
||||
static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
|
||||
static inline unsigned long
|
||||
twd_fxsr_to_i387(const struct user_fxsr_struct *fxsave)
|
||||
{
|
||||
struct _fpxreg *st = NULL;
|
||||
unsigned long twd = (unsigned long) fxsave->twd;
|
||||
|
@ -69,12 +70,16 @@ static inline unsigned long twd_fxsr_to_i387(struct user_fxsr_struct *fxsave)
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Get/set the old 32bit i387 registers (pre-FPX) */
|
||||
static int fpregs_legacy_get(struct task_struct *target,
|
||||
const struct user_regset *regset,
|
||||
struct membuf to)
|
||||
/*
|
||||
* Get/set the old 32bit i387 registers (pre-FPX)
|
||||
*
|
||||
* We provide simple wrappers for mcontext.c, they are only defined locally
|
||||
* because mcontext.c is userspace facing and needs to a different definition
|
||||
* of the structures.
|
||||
*/
|
||||
static int _um_i387_from_fxsr(struct membuf to,
|
||||
const struct user_fxsr_struct *fxsave)
|
||||
{
|
||||
struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
|
||||
int i;
|
||||
|
||||
membuf_store(&to, (unsigned long)fxsave->cwd | 0xffff0000ul);
|
||||
|
@ -91,23 +96,36 @@ static int fpregs_legacy_get(struct task_struct *target,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int fpregs_legacy_set(struct task_struct *target,
|
||||
int um_i387_from_fxsr(struct user_i387_struct *i387,
|
||||
const struct user_fxsr_struct *fxsave);
|
||||
|
||||
int um_i387_from_fxsr(struct user_i387_struct *i387,
|
||||
const struct user_fxsr_struct *fxsave)
|
||||
{
|
||||
struct membuf to = {
|
||||
.p = i387,
|
||||
.left = sizeof(*i387),
|
||||
};
|
||||
|
||||
return _um_i387_from_fxsr(to, fxsave);
|
||||
}
|
||||
|
||||
static int fpregs_legacy_get(struct task_struct *target,
|
||||
const struct user_regset *regset,
|
||||
unsigned int pos, unsigned int count,
|
||||
const void *kbuf, const void __user *ubuf)
|
||||
struct membuf to)
|
||||
{
|
||||
struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
|
||||
const struct user_i387_struct *from;
|
||||
struct user_i387_struct buf;
|
||||
int i;
|
||||
|
||||
if (ubuf) {
|
||||
if (copy_from_user(&buf, ubuf, sizeof(buf)))
|
||||
return -EFAULT;
|
||||
from = &buf;
|
||||
} else {
|
||||
from = kbuf;
|
||||
}
|
||||
return _um_i387_from_fxsr(to, fxsave);
|
||||
}
|
||||
|
||||
int um_fxsr_from_i387(struct user_fxsr_struct *fxsave,
|
||||
const struct user_i387_struct *from);
|
||||
|
||||
int um_fxsr_from_i387(struct user_fxsr_struct *fxsave,
|
||||
const struct user_i387_struct *from)
|
||||
{
|
||||
int i;
|
||||
|
||||
fxsave->cwd = (unsigned short)(from->cwd & 0xffff);
|
||||
fxsave->swd = (unsigned short)(from->swd & 0xffff);
|
||||
|
@ -125,6 +143,26 @@ static int fpregs_legacy_set(struct task_struct *target,
|
|||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int fpregs_legacy_set(struct task_struct *target,
|
||||
const struct user_regset *regset,
|
||||
unsigned int pos, unsigned int count,
|
||||
const void *kbuf, const void __user *ubuf)
|
||||
{
|
||||
struct user_fxsr_struct *fxsave = (void *)target->thread.regs.regs.fp;
|
||||
const struct user_i387_struct *from;
|
||||
struct user_i387_struct buf;
|
||||
|
||||
if (ubuf) {
|
||||
if (copy_from_user(&buf, ubuf, sizeof(buf)))
|
||||
return -EFAULT;
|
||||
from = &buf;
|
||||
} else {
|
||||
from = kbuf;
|
||||
}
|
||||
|
||||
return um_fxsr_from_i387(fxsave, &buf);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int genregs_get(struct task_struct *target,
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
#include <linux/elf.h>
|
||||
#include <linux/crypto.h>
|
||||
#include <linux/kbuild.h>
|
||||
#include <linux/audit.h>
|
||||
#include <asm/mman.h>
|
||||
#include <asm/seccomp.h>
|
||||
|
||||
/* workaround for a warning with -Wmissing-prototypes */
|
||||
void foo(void);
|
||||
|
|
|
@ -6,7 +6,16 @@
|
|||
#ifndef __SYS_SIGCONTEXT_X86_H
|
||||
#define __SYS_SIGCONTEXT_X86_H
|
||||
|
||||
#include <stub-data.h>
|
||||
|
||||
extern void get_regs_from_mc(struct uml_pt_regs *, mcontext_t *);
|
||||
extern void get_mc_from_regs(struct uml_pt_regs *regs, mcontext_t *mc,
|
||||
int single_stepping);
|
||||
|
||||
extern int get_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
|
||||
unsigned long *fp_size_out);
|
||||
extern int set_stub_state(struct uml_pt_regs *regs, struct stub_data *data,
|
||||
int single_stepping);
|
||||
|
||||
#ifdef __i386__
|
||||
|
||||
|
|
23
arch/x86/um/shared/sysdep/stub-data.h
Normal file
23
arch/x86/um/shared/sysdep/stub-data.h
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef __ARCH_STUB_DATA_H
|
||||
#define __ARCH_STUB_DATA_H
|
||||
|
||||
#ifdef __i386__
|
||||
#include <generated/asm-offsets.h>
|
||||
#include <asm/ldt.h>
|
||||
|
||||
struct stub_data_arch {
|
||||
int sync;
|
||||
struct user_desc tls[UM_KERN_GDT_ENTRY_TLS_ENTRIES];
|
||||
};
|
||||
#else
|
||||
#define STUB_SYNC_FS_BASE (1 << 0)
|
||||
#define STUB_SYNC_GS_BASE (1 << 1)
|
||||
struct stub_data_arch {
|
||||
int sync;
|
||||
unsigned long fs_base;
|
||||
unsigned long gs_base;
|
||||
};
|
||||
#endif
|
||||
|
||||
#endif /* __ARCH_STUB_DATA_H */
|
|
@ -13,3 +13,5 @@
|
|||
|
||||
extern void stub_segv_handler(int, siginfo_t *, void *);
|
||||
extern void stub_syscall_handler(void);
|
||||
extern void stub_signal_interrupt(int, siginfo_t *, void *);
|
||||
extern void stub_signal_restorer(void);
|
||||
|
|
|
@ -131,4 +131,17 @@ static __always_inline void *get_stub_data(void)
|
|||
"call *%%eax ;" \
|
||||
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
|
||||
"i" (&fn))
|
||||
|
||||
static __always_inline void
|
||||
stub_seccomp_restore_state(struct stub_data_arch *arch)
|
||||
{
|
||||
for (int i = 0; i < sizeof(arch->tls) / sizeof(arch->tls[0]); i++) {
|
||||
if (arch->sync & (1 << i))
|
||||
stub_syscall1(__NR_set_thread_area,
|
||||
(unsigned long) &arch->tls[i]);
|
||||
}
|
||||
|
||||
arch->sync = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <sysdep/ptrace_user.h>
|
||||
#include <generated/asm-offsets.h>
|
||||
#include <linux/stddef.h>
|
||||
#include <asm/prctl.h>
|
||||
|
||||
#define STUB_MMAP_NR __NR_mmap
|
||||
#define MMAP_OFFSET(o) (o)
|
||||
|
@ -134,4 +135,20 @@ static __always_inline void *get_stub_data(void)
|
|||
"call *%%rax ;" \
|
||||
:: "i" ((1 + STUB_DATA_PAGES) * UM_KERN_PAGE_SIZE), \
|
||||
"i" (&fn))
|
||||
|
||||
static __always_inline void
|
||||
stub_seccomp_restore_state(struct stub_data_arch *arch)
|
||||
{
|
||||
/*
|
||||
* We could use _writefsbase_u64/_writegsbase_u64 if the host reports
|
||||
* support in the hwcaps (HWCAP2_FSGSBASE).
|
||||
*/
|
||||
if (arch->sync & STUB_SYNC_FS_BASE)
|
||||
stub_syscall2(__NR_arch_prctl, ARCH_SET_FS, arch->fs_base);
|
||||
if (arch->sync & STUB_SYNC_GS_BASE)
|
||||
stub_syscall2(__NR_arch_prctl, ARCH_SET_GS, arch->gs_base);
|
||||
|
||||
arch->sync = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <skas.h>
|
||||
#include <sysdep/tls.h>
|
||||
#include <asm/desc.h>
|
||||
#include <stub-data.h>
|
||||
|
||||
/*
|
||||
* If needed we can detect when it's uninitialized.
|
||||
|
@ -21,14 +22,25 @@
|
|||
static int host_supports_tls = -1;
|
||||
int host_gdt_entry_tls_min;
|
||||
|
||||
static int do_set_thread_area(struct user_desc *info)
|
||||
static int do_set_thread_area(struct task_struct* task, struct user_desc *info)
|
||||
{
|
||||
int ret;
|
||||
u32 cpu;
|
||||
|
||||
cpu = get_cpu();
|
||||
ret = os_set_thread_area(info, userspace_pid[cpu]);
|
||||
put_cpu();
|
||||
if (info->entry_number < host_gdt_entry_tls_min ||
|
||||
info->entry_number >= host_gdt_entry_tls_min + GDT_ENTRY_TLS_ENTRIES)
|
||||
return -EINVAL;
|
||||
|
||||
if (using_seccomp) {
|
||||
int idx = info->entry_number - host_gdt_entry_tls_min;
|
||||
struct stub_data *data = (void *)task->mm->context.id.stack;
|
||||
|
||||
data->arch_data.tls[idx] = *info;
|
||||
data->arch_data.sync |= BIT(idx);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = os_set_thread_area(info, task->mm->context.id.pid);
|
||||
|
||||
if (ret)
|
||||
printk(KERN_ERR "PTRACE_SET_THREAD_AREA failed, err = %d, "
|
||||
|
@ -97,7 +109,7 @@ static int load_TLS(int flags, struct task_struct *to)
|
|||
if (!(flags & O_FORCE) && curr->flushed)
|
||||
continue;
|
||||
|
||||
ret = do_set_thread_area(&curr->tls);
|
||||
ret = do_set_thread_area(current, &curr->tls);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
@ -275,7 +287,7 @@ SYSCALL_DEFINE1(set_thread_area, struct user_desc __user *, user_desc)
|
|||
return -EFAULT;
|
||||
}
|
||||
|
||||
ret = do_set_thread_area(&info);
|
||||
ret = do_set_thread_area(current, &info);
|
||||
if (ret)
|
||||
return ret;
|
||||
return set_tls_entry(current, &info, idx, 1);
|
||||
|
|
Loading…
Add table
Reference in a new issue