linux/net/rxrpc/proc.c
David Howells a4ea4c4776 rxrpc: Don't use a ring buffer for call Tx queue
Change the way the Tx queueing works to make the following ends easier to
achieve:

 (1) The filling of packets, the encryption of packets and the transmission
     of packets can be handled in parallel by separate threads, rather than
     rxrpc_sendmsg() allocating, filling, encrypting and transmitting each
     packet before moving onto the next one.

 (2) Get rid of the fixed-size ring which sets a hard limit on the number
     of packets that can be retained in the ring.  This allows the number
     of packets to increase without having to allocate a very large ring or
     having variable-sized rings.

     [Note: the downside of this is that it's then less efficient to locate
     a packet for retransmission as we then have to step through a list and
     examine each buffer in the list.]

 (3) Allow the filler/encrypter to run ahead of the transmission window.

 (4) Make it easier to do zero copy UDP from the packet buffers.

 (5) Make it easier to do zero copy from userspace to the packet buffers -
     and thence to UDP (only if for unauthenticated connections).

To that end, the following changes are made:

 (1) Use the new rxrpc_txbuf struct instead of sk_buff for keeping packets
     to be transmitted in.  This allows them to be placed on multiple
     queues simultaneously.  An sk_buff isn't really necessary as it's
     never passed on to lower-level networking code.

 (2) Keep the transmissable packets in a linked list on the call struct
     rather than in a ring.  As a consequence, the annotation buffer isn't
     used either; rather a flag is set on the packet to indicate ackedness.

 (3) Use the RXRPC_CALL_TX_LAST flag to indicate that the last packet to be
     transmitted has been queued.  Add RXRPC_CALL_TX_ALL_ACKED to indicate
     that all packets up to and including the last got hard acked.

 (4) Wire headers are now stored in the txbuf rather than being concocted
     on the stack and they're stored immediately before the data, thereby
     allowing zerocopy of a single span.

 (5) Don't bother with instant-resend on transmission failure; rather,
     leave it for a timer or an ACK packet to trigger.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
2022-11-08 16:42:28 +00:00

495 lines
13 KiB
C

// SPDX-License-Identifier: GPL-2.0-or-later
/* /proc/net/ support for AF_RXRPC
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*/
#include <linux/module.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
[RXRPC_CONN_UNUSED] = "Unused ",
[RXRPC_CONN_CLIENT] = "Client ",
[RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc",
[RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
[RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
[RXRPC_CONN_SERVICE] = "SvSecure",
[RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
[RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
};
/*
* generate a list of extant and dead calls in /proc/net/rxrpc_calls
*/
static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
__acquires(rcu)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
rcu_read_lock();
return seq_list_start_head_rcu(&rxnet->calls, *_pos);
}
static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
return seq_list_next_rcu(v, &rxnet->calls, pos);
}
static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
__releases(rcu)
{
rcu_read_unlock();
}
static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_local *local;
struct rxrpc_sock *rx;
struct rxrpc_peer *peer;
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
u64 wtmp;
if (v == &rxnet->calls) {
seq_puts(seq,
"Proto Local "
" Remote "
" SvID ConnID CallID End Use State Abort "
" DebugId TxSeq TW RxSeq RW RxSerial RxTimo\n");
return 0;
}
call = list_entry(v, struct rxrpc_call, link);
rx = rcu_dereference(call->socket);
if (rx) {
local = READ_ONCE(rx->local);
if (local)
sprintf(lbuff, "%pISpc", &local->srx.transport);
else
strcpy(lbuff, "no_local");
} else {
strcpy(lbuff, "no_socket");
}
peer = call->peer;
if (peer)
sprintf(rbuff, "%pISpc", &peer->srx.transport);
else
strcpy(rbuff, "no_connection");
if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
timeout = READ_ONCE(call->expect_rx_by);
timeout -= jiffies;
}
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %06lx\n",
lbuff,
rbuff,
call->service_id,
call->cid,
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
refcount_read(&call->ref),
rxrpc_call_states[call->state],
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
call->rx_serial,
timeout);
return 0;
}
const struct seq_operations rxrpc_call_seq_ops = {
.start = rxrpc_call_seq_start,
.next = rxrpc_call_seq_next,
.stop = rxrpc_call_seq_stop,
.show = rxrpc_call_seq_show,
};
/*
* generate a list of extant virtual connections in /proc/net/rxrpc_conns
*/
static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
__acquires(rxnet->conn_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
read_lock(&rxnet->conn_lock);
return seq_list_start_head(&rxnet->conn_proc_list, *_pos);
}
static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
loff_t *pos)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
return seq_list_next(v, &rxnet->conn_proc_list, pos);
}
static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
__releases(rxnet->conn_lock)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
read_unlock(&rxnet->conn_lock);
}
static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
char lbuff[50], rbuff[50];
if (v == &rxnet->conn_proc_list) {
seq_puts(seq,
"Proto Local "
" Remote "
" SvID ConnID End Use State Key "
" Serial ISerial CallId0 CallId1 CallId2 CallId3\n"
);
return 0;
}
conn = list_entry(v, struct rxrpc_connection, proc_link);
if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) {
strcpy(lbuff, "no_local");
strcpy(rbuff, "no_connection");
goto print;
}
sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
print:
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %s %3u"
" %s %08x %08x %08x %08x %08x %08x %08x\n",
lbuff,
rbuff,
conn->service_id,
conn->proto.cid,
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
refcount_read(&conn->ref),
rxrpc_conn_states[conn->state],
key_serial(conn->params.key),
atomic_read(&conn->serial),
conn->hi_serial,
conn->channels[0].call_id,
conn->channels[1].call_id,
conn->channels[2].call_id,
conn->channels[3].call_id);
return 0;
}
const struct seq_operations rxrpc_connection_seq_ops = {
.start = rxrpc_connection_seq_start,
.next = rxrpc_connection_seq_next,
.stop = rxrpc_connection_seq_stop,
.show = rxrpc_connection_seq_show,
};
/*
* generate a list of extant virtual peers in /proc/net/rxrpc/peers
*/
static int rxrpc_peer_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_peer *peer;
time64_t now;
char lbuff[50], rbuff[50];
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Proto Local "
" Remote "
" Use CW MTU LastUse RTT RTO\n"
);
return 0;
}
peer = list_entry(v, struct rxrpc_peer, hash_link);
sprintf(lbuff, "%pISpc", &peer->local->srx.transport);
sprintf(rbuff, "%pISpc", &peer->srx.transport);
now = ktime_get_seconds();
seq_printf(seq,
"UDP %-47.47s %-47.47s %3u"
" %3u %5u %6llus %8u %8u\n",
lbuff,
rbuff,
refcount_read(&peer->ref),
peer->cong_cwnd,
peer->mtu,
now - peer->last_tx_at,
peer->srtt_us >> 3,
jiffies_to_usecs(peer->rto_j));
return 0;
}
static void *rxrpc_peer_seq_start(struct seq_file *seq, loff_t *_pos)
__acquires(rcu)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned int bucket, n;
unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash);
void *p;
rcu_read_lock();
if (*_pos >= UINT_MAX)
return NULL;
n = *_pos & ((1U << shift) - 1);
bucket = *_pos >> shift;
for (;;) {
if (bucket >= HASH_SIZE(rxnet->peer_hash)) {
*_pos = UINT_MAX;
return NULL;
}
if (n == 0) {
if (bucket == 0)
return SEQ_START_TOKEN;
*_pos += 1;
n++;
}
p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1);
if (p)
return p;
bucket++;
n = 1;
*_pos = (bucket << shift) | n;
}
}
static void *rxrpc_peer_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned int bucket, n;
unsigned int shift = 32 - HASH_BITS(rxnet->peer_hash);
void *p;
if (*_pos >= UINT_MAX)
return NULL;
bucket = *_pos >> shift;
p = seq_hlist_next_rcu(v, &rxnet->peer_hash[bucket], _pos);
if (p)
return p;
for (;;) {
bucket++;
n = 1;
*_pos = (bucket << shift) | n;
if (bucket >= HASH_SIZE(rxnet->peer_hash)) {
*_pos = UINT_MAX;
return NULL;
}
if (n == 0) {
*_pos += 1;
n++;
}
p = seq_hlist_start_rcu(&rxnet->peer_hash[bucket], n - 1);
if (p)
return p;
}
}
static void rxrpc_peer_seq_stop(struct seq_file *seq, void *v)
__releases(rcu)
{
rcu_read_unlock();
}
const struct seq_operations rxrpc_peer_seq_ops = {
.start = rxrpc_peer_seq_start,
.next = rxrpc_peer_seq_next,
.stop = rxrpc_peer_seq_stop,
.show = rxrpc_peer_seq_show,
};
/*
* Generate a list of extant virtual local endpoints in /proc/net/rxrpc/locals
*/
static int rxrpc_local_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_local *local;
char lbuff[50];
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Proto Local "
" Use Act\n");
return 0;
}
local = hlist_entry(v, struct rxrpc_local, link);
sprintf(lbuff, "%pISpc", &local->srx.transport);
seq_printf(seq,
"UDP %-47.47s %3u %3u\n",
lbuff,
refcount_read(&local->ref),
atomic_read(&local->active_users));
return 0;
}
static void *rxrpc_local_seq_start(struct seq_file *seq, loff_t *_pos)
__acquires(rcu)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
unsigned int n;
rcu_read_lock();
if (*_pos >= UINT_MAX)
return NULL;
n = *_pos;
if (n == 0)
return SEQ_START_TOKEN;
return seq_hlist_start_rcu(&rxnet->local_endpoints, n - 1);
}
static void *rxrpc_local_seq_next(struct seq_file *seq, void *v, loff_t *_pos)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
if (*_pos >= UINT_MAX)
return NULL;
return seq_hlist_next_rcu(v, &rxnet->local_endpoints, _pos);
}
static void rxrpc_local_seq_stop(struct seq_file *seq, void *v)
__releases(rcu)
{
rcu_read_unlock();
}
const struct seq_operations rxrpc_local_seq_ops = {
.start = rxrpc_local_seq_start,
.next = rxrpc_local_seq_next,
.stop = rxrpc_local_seq_stop,
.show = rxrpc_local_seq_show,
};
/*
* Display stats in /proc/net/rxrpc/stats
*/
int rxrpc_stats_show(struct seq_file *seq, void *v)
{
struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(seq));
seq_printf(seq,
"Data : send=%u sendf=%u\n",
atomic_read(&rxnet->stat_tx_data_send),
atomic_read(&rxnet->stat_tx_data_send_frag));
seq_printf(seq,
"Data-Tx : nr=%u retrans=%u\n",
atomic_read(&rxnet->stat_tx_data),
atomic_read(&rxnet->stat_tx_data_retrans));
seq_printf(seq,
"Data-Rx : nr=%u reqack=%u jumbo=%u\n",
atomic_read(&rxnet->stat_rx_data),
atomic_read(&rxnet->stat_rx_data_reqack),
atomic_read(&rxnet->stat_rx_data_jumbo));
seq_printf(seq,
"Ack : fill=%u send=%u skip=%u\n",
atomic_read(&rxnet->stat_tx_ack_fill),
atomic_read(&rxnet->stat_tx_ack_send),
atomic_read(&rxnet->stat_tx_ack_skip));
seq_printf(seq,
"Ack-Tx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_REQUESTED]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DUPLICATE]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_EXCEEDS_WINDOW]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_NOSPACE]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_PING_RESPONSE]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_DELAY]),
atomic_read(&rxnet->stat_tx_acks[RXRPC_ACK_IDLE]));
seq_printf(seq,
"Ack-Rx : req=%u dup=%u oos=%u exw=%u nos=%u png=%u prs=%u dly=%u idl=%u\n",
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_REQUESTED]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DUPLICATE]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_OUT_OF_SEQUENCE]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_EXCEEDS_WINDOW]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_NOSPACE]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_PING_RESPONSE]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_DELAY]),
atomic_read(&rxnet->stat_rx_acks[RXRPC_ACK_IDLE]));
seq_printf(seq,
"Why-Req-A: acklost=%u already=%u mrtt=%u ortt=%u\n",
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_ack_lost]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_already_on]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_more_rtt]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_old_rtt]));
seq_printf(seq,
"Why-Req-A: nolast=%u retx=%u slows=%u smtxw=%u\n",
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_no_srv_last]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_retrans]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_slow_start]),
atomic_read(&rxnet->stat_why_req_ack[rxrpc_reqack_small_txwin]));
seq_printf(seq,
"Buffers : txb=%u rxb=%u\n",
atomic_read(&rxrpc_nr_txbuf),
atomic_read(&rxrpc_n_rx_skbs));
return 0;
}
/*
* Clear stats if /proc/net/rxrpc/stats is written to.
*/
int rxrpc_stats_clear(struct file *file, char *buf, size_t size)
{
struct seq_file *m = file->private_data;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_single_net(m));
if (size > 1 || (size == 1 && buf[0] != '\n'))
return -EINVAL;
atomic_set(&rxnet->stat_tx_data, 0);
atomic_set(&rxnet->stat_tx_data_retrans, 0);
atomic_set(&rxnet->stat_tx_data_send, 0);
atomic_set(&rxnet->stat_tx_data_send_frag, 0);
atomic_set(&rxnet->stat_rx_data, 0);
atomic_set(&rxnet->stat_rx_data_reqack, 0);
atomic_set(&rxnet->stat_rx_data_jumbo, 0);
atomic_set(&rxnet->stat_tx_ack_fill, 0);
atomic_set(&rxnet->stat_tx_ack_send, 0);
atomic_set(&rxnet->stat_tx_ack_skip, 0);
memset(&rxnet->stat_tx_acks, 0, sizeof(rxnet->stat_tx_acks));
memset(&rxnet->stat_rx_acks, 0, sizeof(rxnet->stat_rx_acks));
memset(&rxnet->stat_why_req_ack, 0, sizeof(rxnet->stat_why_req_ack));
return size;
}