mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-09-18 22:14:16 +00:00
Merge branch 'mptcp-expose-more-info-and-small-improvements'
Matthieu Baerts says:
====================
mptcp: expose more info and small improvements
Patch 1-3/9 track and expose some aggregated data counters at the MPTCP
level: the number of retransmissions and the bytes that have been
transferred. The first patch prepares the work by moving where snd_una
is updated for fallback sockets while the last patch adds some tests to
cover the new code.
Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO.
This new socket option allows to combine info from MPTCP_INFO,
MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be
needed to have all info in one because the path-manager can close and
re-create subflows between getsockopt() and fooling the accounting. The
first patch introduces a unique subflow ID to easily detect when
subflows are being re-created with the same 5-tuple while the last patch
adds some tests to cover the new code.
Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt")
can reveal a bug that were there for a bit of time, see [1]. A fix has
recently been fixed to netdev for the -net tree: "mptcp: ensure listener
is unhashed before updating the sk status", see [2]. There is no
conflicts between the two patches but it might be better to apply this
series after the one for -net and after having merged "net" into
"net-next".
Patch 7/9 is similar to commit 47867f0a7e
("selftests: mptcp: join:
skip check if MIB counter not supported") recently applied in the -net
tree but here it adapts the new code that is only in net-next (and it
fixes a merge conflict resolution which didn't have any impact).
Patch 8 and 9/9 are two simple refactoring. One to consolidate the
transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated
code. The other one reduces the scope of an argument passed to
mptcp_pm_alloc_anno_list() function.
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1]
Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2]
====================
Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
This commit is contained in:
commit
98e95872f2
10 changed files with 356 additions and 46 deletions
|
@ -123,6 +123,11 @@ struct mptcp_info {
|
|||
__u8 mptcpi_local_addr_used;
|
||||
__u8 mptcpi_local_addr_max;
|
||||
__u8 mptcpi_csum_enabled;
|
||||
__u32 mptcpi_retransmits;
|
||||
__u64 mptcpi_bytes_retrans;
|
||||
__u64 mptcpi_bytes_sent;
|
||||
__u64 mptcpi_bytes_received;
|
||||
__u64 mptcpi_bytes_acked;
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -244,9 +249,33 @@ struct mptcp_subflow_addrs {
|
|||
};
|
||||
};
|
||||
|
||||
struct mptcp_subflow_info {
|
||||
__u32 id;
|
||||
struct mptcp_subflow_addrs addrs;
|
||||
};
|
||||
|
||||
struct mptcp_full_info {
|
||||
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
|
||||
__u32 size_tcpinfo_user;
|
||||
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
|
||||
__u32 size_sfinfo_user;
|
||||
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
|
||||
__u32 size_arrays_user; /* max subflows that userspace is interested in;
|
||||
* the buffers at subflow_info/tcp_info
|
||||
* are respectively at least:
|
||||
* size_arrays * size_sfinfo_user
|
||||
* size_arrays * size_tcpinfo_user
|
||||
* bytes wide
|
||||
*/
|
||||
__aligned_u64 subflow_info;
|
||||
__aligned_u64 tcp_info;
|
||||
struct mptcp_info mptcp_info;
|
||||
};
|
||||
|
||||
/* MPTCP socket options */
|
||||
#define MPTCP_INFO 1
|
||||
#define MPTCP_TCPINFO 2
|
||||
#define MPTCP_SUBFLOW_ADDRS 3
|
||||
#define MPTCP_FULL_INFO 4
|
||||
|
||||
#endif /* _UAPI_MPTCP_H */
|
||||
|
|
|
@ -1026,6 +1026,12 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq)
|
|||
return cur_seq;
|
||||
}
|
||||
|
||||
static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una)
|
||||
{
|
||||
msk->bytes_acked += new_snd_una - msk->snd_una;
|
||||
msk->snd_una = new_snd_una;
|
||||
}
|
||||
|
||||
static void ack_update_msk(struct mptcp_sock *msk,
|
||||
struct sock *ssk,
|
||||
struct mptcp_options_received *mp_opt)
|
||||
|
@ -1057,7 +1063,7 @@ static void ack_update_msk(struct mptcp_sock *msk,
|
|||
__mptcp_check_push(sk, ssk);
|
||||
|
||||
if (after64(new_snd_una, old_snd_una)) {
|
||||
msk->snd_una = new_snd_una;
|
||||
__mptcp_snd_una_update(msk, new_snd_una);
|
||||
__mptcp_data_acked(sk);
|
||||
}
|
||||
mptcp_data_unlock(sk);
|
||||
|
@ -1119,6 +1125,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb)
|
|||
mptcp_data_lock(subflow->conn);
|
||||
if (sk_stream_memory_free(sk))
|
||||
__mptcp_check_push(subflow->conn, sk);
|
||||
|
||||
/* on fallback we just need to ignore the msk-level snd_una, as
|
||||
* this is really plain TCP
|
||||
*/
|
||||
__mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt));
|
||||
|
||||
__mptcp_data_acked(subflow->conn);
|
||||
mptcp_data_unlock(subflow->conn);
|
||||
return true;
|
||||
|
|
|
@ -341,7 +341,7 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
|
|||
}
|
||||
|
||||
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
|
||||
const struct mptcp_pm_addr_entry *entry)
|
||||
const struct mptcp_addr_info *addr)
|
||||
{
|
||||
struct mptcp_pm_add_entry *add_entry = NULL;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
|
@ -349,7 +349,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
|
|||
|
||||
lockdep_assert_held(&msk->pm.lock);
|
||||
|
||||
add_entry = mptcp_lookup_anno_list_by_saddr(msk, &entry->addr);
|
||||
add_entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
|
||||
|
||||
if (add_entry) {
|
||||
if (mptcp_pm_is_kernel(msk))
|
||||
|
@ -366,7 +366,7 @@ bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
|
|||
|
||||
list_add(&add_entry->list, &msk->pm.anno_list);
|
||||
|
||||
add_entry->addr = entry->addr;
|
||||
add_entry->addr = *addr;
|
||||
add_entry->sock = msk;
|
||||
add_entry->retrans_times = 0;
|
||||
|
||||
|
@ -576,7 +576,7 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
|
|||
return;
|
||||
|
||||
if (local) {
|
||||
if (mptcp_pm_alloc_anno_list(msk, local)) {
|
||||
if (mptcp_pm_alloc_anno_list(msk, &local->addr)) {
|
||||
__clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
|
||||
msk->pm.add_addr_signaled++;
|
||||
mptcp_pm_announce_addr(msk, &local->addr, false);
|
||||
|
|
|
@ -193,7 +193,7 @@ int mptcp_nl_cmd_announce(struct sk_buff *skb, struct genl_info *info)
|
|||
lock_sock((struct sock *)msk);
|
||||
spin_lock_bh(&msk->pm.lock);
|
||||
|
||||
if (mptcp_pm_alloc_anno_list(msk, &addr_val)) {
|
||||
if (mptcp_pm_alloc_anno_list(msk, &addr_val.addr)) {
|
||||
msk->pm.add_addr_signaled++;
|
||||
mptcp_pm_announce_addr(msk, &addr_val.addr, false);
|
||||
mptcp_pm_nl_addr_send_ack(msk);
|
||||
|
|
|
@ -96,6 +96,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
|
|||
list_add(&subflow->node, &msk->conn_list);
|
||||
sock_hold(ssock->sk);
|
||||
subflow->request_mptcp = 1;
|
||||
subflow->subflow_id = msk->subflow_id++;
|
||||
|
||||
/* This is the first subflow, always with id 0 */
|
||||
subflow->local_id_valid = 1;
|
||||
|
@ -377,6 +378,7 @@ static bool __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
|
|||
|
||||
if (MPTCP_SKB_CB(skb)->map_seq == msk->ack_seq) {
|
||||
/* in sequence */
|
||||
msk->bytes_received += copy_len;
|
||||
WRITE_ONCE(msk->ack_seq, msk->ack_seq + copy_len);
|
||||
tail = skb_peek_tail(&sk->sk_receive_queue);
|
||||
if (tail && mptcp_try_coalesce(sk, tail, skb))
|
||||
|
@ -760,6 +762,7 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
|
|||
MPTCP_SKB_CB(skb)->map_seq += delta;
|
||||
__skb_queue_tail(&sk->sk_receive_queue, skb);
|
||||
}
|
||||
msk->bytes_received += end_seq - msk->ack_seq;
|
||||
msk->ack_seq = end_seq;
|
||||
moved = true;
|
||||
}
|
||||
|
@ -845,6 +848,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
|
|||
if (sk->sk_socket && !ssk->sk_socket)
|
||||
mptcp_sock_graft(ssk, sk->sk_socket);
|
||||
|
||||
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
|
||||
mptcp_sockopt_sync_locked(msk, ssk);
|
||||
mptcp_subflow_joined(msk, ssk);
|
||||
return true;
|
||||
|
@ -1004,12 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk)
|
|||
struct mptcp_data_frag *dtmp, *dfrag;
|
||||
u64 snd_una;
|
||||
|
||||
/* on fallback we just need to ignore snd_una, as this is really
|
||||
* plain TCP
|
||||
*/
|
||||
if (__mptcp_check_fallback(msk))
|
||||
msk->snd_una = READ_ONCE(msk->snd_nxt);
|
||||
|
||||
snd_una = msk->snd_una;
|
||||
list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) {
|
||||
if (after64(dfrag->data_seq + dfrag->data_len, snd_una))
|
||||
|
@ -1537,8 +1535,10 @@ static void mptcp_update_post_push(struct mptcp_sock *msk,
|
|||
* that has been handed to the subflow for transmission
|
||||
* and skip update in case it was old dfrag.
|
||||
*/
|
||||
if (likely(after64(snd_nxt_new, msk->snd_nxt)))
|
||||
if (likely(after64(snd_nxt_new, msk->snd_nxt))) {
|
||||
msk->bytes_sent += snd_nxt_new - msk->snd_nxt;
|
||||
msk->snd_nxt = snd_nxt_new;
|
||||
}
|
||||
}
|
||||
|
||||
void mptcp_check_and_set_pending(struct sock *sk)
|
||||
|
@ -2596,6 +2596,7 @@ static void __mptcp_retrans(struct sock *sk)
|
|||
}
|
||||
if (copied) {
|
||||
dfrag->already_sent = max(dfrag->already_sent, info.sent);
|
||||
msk->bytes_retrans += copied;
|
||||
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
|
||||
info.size_goal);
|
||||
WRITE_ONCE(msk->allow_infinite_fallback, false);
|
||||
|
@ -2654,6 +2655,7 @@ static void mptcp_do_fastclose(struct sock *sk)
|
|||
struct mptcp_subflow_context *subflow, *tmp;
|
||||
struct mptcp_sock *msk = mptcp_sk(sk);
|
||||
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
mptcp_for_each_subflow_safe(msk, subflow, tmp)
|
||||
__mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow),
|
||||
subflow, MPTCP_CF_FASTCLOSE);
|
||||
|
@ -2691,10 +2693,9 @@ static void mptcp_worker(struct work_struct *work)
|
|||
* even if it is orphaned and in FIN_WAIT2 state
|
||||
*/
|
||||
if (sock_flag(sk, SOCK_DEAD)) {
|
||||
if (mptcp_should_close(sk)) {
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
if (mptcp_should_close(sk))
|
||||
mptcp_do_fastclose(sk);
|
||||
}
|
||||
|
||||
if (sk->sk_state == TCP_CLOSE) {
|
||||
__mptcp_destroy_sock(sk);
|
||||
goto unlock;
|
||||
|
@ -2733,6 +2734,7 @@ static int __mptcp_init_sock(struct sock *sk)
|
|||
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
|
||||
WRITE_ONCE(msk->allow_infinite_fallback, true);
|
||||
msk->recovery = false;
|
||||
msk->subflow_id = 1;
|
||||
|
||||
mptcp_pm_data_init(msk);
|
||||
|
||||
|
@ -2936,7 +2938,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
|
|||
void __mptcp_unaccepted_force_close(struct sock *sk)
|
||||
{
|
||||
sock_set_flag(sk, SOCK_DEAD);
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
mptcp_do_fastclose(sk);
|
||||
__mptcp_destroy_sock(sk);
|
||||
}
|
||||
|
@ -2978,7 +2979,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
|
|||
/* If the msk has read data, or the caller explicitly ask it,
|
||||
* do the MPTCP equivalent of TCP reset, aka MPTCP fastclose
|
||||
*/
|
||||
inet_sk_state_store(sk, TCP_CLOSE);
|
||||
mptcp_do_fastclose(sk);
|
||||
timeout = 0;
|
||||
} else if (mptcp_close_state(sk)) {
|
||||
|
@ -3108,6 +3108,10 @@ static int mptcp_disconnect(struct sock *sk, int flags)
|
|||
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
|
||||
mptcp_pm_data_reset(msk);
|
||||
mptcp_ca_reset(sk);
|
||||
msk->bytes_acked = 0;
|
||||
msk->bytes_received = 0;
|
||||
msk->bytes_sent = 0;
|
||||
msk->bytes_retrans = 0;
|
||||
|
||||
WRITE_ONCE(sk->sk_shutdown, 0);
|
||||
sk_error_report(sk);
|
||||
|
@ -3157,6 +3161,9 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk,
|
|||
msk->wnd_end = msk->snd_nxt + req->rsk_rcv_wnd;
|
||||
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
|
||||
|
||||
/* passive msk is created after the first/MPC subflow */
|
||||
msk->subflow_id = 2;
|
||||
|
||||
sock_reset_flag(nsk, SOCK_RCU_FREE);
|
||||
security_inet_csk_clone(nsk, req);
|
||||
|
||||
|
|
|
@ -262,10 +262,13 @@ struct mptcp_sock {
|
|||
u64 local_key;
|
||||
u64 remote_key;
|
||||
u64 write_seq;
|
||||
u64 bytes_sent;
|
||||
u64 snd_nxt;
|
||||
u64 bytes_received;
|
||||
u64 ack_seq;
|
||||
atomic64_t rcv_wnd_sent;
|
||||
u64 rcv_data_fin_seq;
|
||||
u64 bytes_retrans;
|
||||
int rmem_fwd_alloc;
|
||||
struct sock *last_snd;
|
||||
int snd_burst;
|
||||
|
@ -274,6 +277,7 @@ struct mptcp_sock {
|
|||
* recovery related fields are under data_lock
|
||||
* protection
|
||||
*/
|
||||
u64 bytes_acked;
|
||||
u64 snd_una;
|
||||
u64 wnd_end;
|
||||
unsigned long timer_ival;
|
||||
|
@ -319,7 +323,8 @@ struct mptcp_sock {
|
|||
u64 rtt_us; /* last maximum rtt of subflows */
|
||||
} rcvq_space;
|
||||
|
||||
u32 setsockopt_seq;
|
||||
u32 subflow_id;
|
||||
u32 setsockopt_seq;
|
||||
char ca_name[TCP_CA_NAME_MAX];
|
||||
struct mptcp_sock *dl_next;
|
||||
};
|
||||
|
@ -500,6 +505,8 @@ struct mptcp_subflow_context {
|
|||
u8 reset_reason:4;
|
||||
u8 stale_count;
|
||||
|
||||
u32 subflow_id;
|
||||
|
||||
long delegated_status;
|
||||
unsigned long fail_tout;
|
||||
|
||||
|
@ -810,7 +817,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
|
|||
struct mptcp_addr_info *rem,
|
||||
u8 bkup);
|
||||
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
|
||||
const struct mptcp_pm_addr_entry *entry);
|
||||
const struct mptcp_addr_info *addr);
|
||||
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
|
||||
bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk);
|
||||
struct mptcp_pm_add_entry *
|
||||
|
|
|
@ -14,7 +14,8 @@
|
|||
#include <net/mptcp.h>
|
||||
#include "protocol.h"
|
||||
|
||||
#define MIN_INFO_OPTLEN_SIZE 16
|
||||
#define MIN_INFO_OPTLEN_SIZE 16
|
||||
#define MIN_FULL_INFO_OPTLEN_SIZE 40
|
||||
|
||||
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
|
||||
{
|
||||
|
@ -889,7 +890,9 @@ out:
|
|||
|
||||
void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
|
||||
{
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
u32 flags = 0;
|
||||
bool slow;
|
||||
|
||||
memset(info, 0, sizeof(*info));
|
||||
|
||||
|
@ -898,6 +901,9 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
|
|||
info->mptcpi_add_addr_accepted = READ_ONCE(msk->pm.add_addr_accepted);
|
||||
info->mptcpi_local_addr_used = READ_ONCE(msk->pm.local_addr_used);
|
||||
|
||||
if (inet_sk_state_load(sk) == TCP_LISTEN)
|
||||
return;
|
||||
|
||||
/* The following limits only make sense for the in-kernel PM */
|
||||
if (mptcp_pm_is_kernel(msk)) {
|
||||
info->mptcpi_subflows_max =
|
||||
|
@ -915,11 +921,21 @@ void mptcp_diag_fill_info(struct mptcp_sock *msk, struct mptcp_info *info)
|
|||
if (READ_ONCE(msk->can_ack))
|
||||
flags |= MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED;
|
||||
info->mptcpi_flags = flags;
|
||||
info->mptcpi_token = READ_ONCE(msk->token);
|
||||
info->mptcpi_write_seq = READ_ONCE(msk->write_seq);
|
||||
info->mptcpi_snd_una = READ_ONCE(msk->snd_una);
|
||||
info->mptcpi_rcv_nxt = READ_ONCE(msk->ack_seq);
|
||||
info->mptcpi_csum_enabled = READ_ONCE(msk->csum_enabled);
|
||||
mptcp_data_lock(sk);
|
||||
info->mptcpi_snd_una = msk->snd_una;
|
||||
info->mptcpi_rcv_nxt = msk->ack_seq;
|
||||
info->mptcpi_bytes_acked = msk->bytes_acked;
|
||||
mptcp_data_unlock(sk);
|
||||
|
||||
slow = lock_sock_fast(sk);
|
||||
info->mptcpi_csum_enabled = msk->csum_enabled;
|
||||
info->mptcpi_token = msk->token;
|
||||
info->mptcpi_write_seq = msk->write_seq;
|
||||
info->mptcpi_retransmits = inet_csk(sk)->icsk_retransmits;
|
||||
info->mptcpi_bytes_sent = msk->bytes_sent;
|
||||
info->mptcpi_bytes_received = msk->bytes_received;
|
||||
info->mptcpi_bytes_retrans = msk->bytes_retrans;
|
||||
unlock_sock_fast(sk, slow);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(mptcp_diag_fill_info);
|
||||
|
||||
|
@ -966,7 +982,8 @@ static int mptcp_put_subflow_data(struct mptcp_subflow_data *sfd,
|
|||
}
|
||||
|
||||
static int mptcp_get_subflow_data(struct mptcp_subflow_data *sfd,
|
||||
char __user *optval, int __user *optlen)
|
||||
char __user *optval,
|
||||
int __user *optlen)
|
||||
{
|
||||
int len, copylen;
|
||||
|
||||
|
@ -1147,6 +1164,125 @@ static int mptcp_getsockopt_subflow_addrs(struct mptcp_sock *msk, char __user *o
|
|||
return 0;
|
||||
}
|
||||
|
||||
static int mptcp_get_full_info(struct mptcp_full_info *mfi,
|
||||
char __user *optval,
|
||||
int __user *optlen)
|
||||
{
|
||||
int len;
|
||||
|
||||
BUILD_BUG_ON(offsetof(struct mptcp_full_info, mptcp_info) !=
|
||||
MIN_FULL_INFO_OPTLEN_SIZE);
|
||||
|
||||
if (get_user(len, optlen))
|
||||
return -EFAULT;
|
||||
|
||||
if (len < MIN_FULL_INFO_OPTLEN_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memset(mfi, 0, sizeof(*mfi));
|
||||
if (copy_from_user(mfi, optval, MIN_FULL_INFO_OPTLEN_SIZE))
|
||||
return -EFAULT;
|
||||
|
||||
if (mfi->size_tcpinfo_kernel ||
|
||||
mfi->size_sfinfo_kernel ||
|
||||
mfi->num_subflows)
|
||||
return -EINVAL;
|
||||
|
||||
if (mfi->size_sfinfo_user > INT_MAX ||
|
||||
mfi->size_tcpinfo_user > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
return len - MIN_FULL_INFO_OPTLEN_SIZE;
|
||||
}
|
||||
|
||||
static int mptcp_put_full_info(struct mptcp_full_info *mfi,
|
||||
char __user *optval,
|
||||
u32 copylen,
|
||||
int __user *optlen)
|
||||
{
|
||||
copylen += MIN_FULL_INFO_OPTLEN_SIZE;
|
||||
if (put_user(copylen, optlen))
|
||||
return -EFAULT;
|
||||
|
||||
if (copy_to_user(optval, mfi, copylen))
|
||||
return -EFAULT;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int mptcp_getsockopt_full_info(struct mptcp_sock *msk, char __user *optval,
|
||||
int __user *optlen)
|
||||
{
|
||||
unsigned int sfcount = 0, copylen = 0;
|
||||
struct mptcp_subflow_context *subflow;
|
||||
struct sock *sk = (struct sock *)msk;
|
||||
void __user *tcpinfoptr, *sfinfoptr;
|
||||
struct mptcp_full_info mfi;
|
||||
int len;
|
||||
|
||||
len = mptcp_get_full_info(&mfi, optval, optlen);
|
||||
if (len < 0)
|
||||
return len;
|
||||
|
||||
/* don't bother filling the mptcp info if there is not enough
|
||||
* user-space-provided storage
|
||||
*/
|
||||
if (len > 0) {
|
||||
mptcp_diag_fill_info(msk, &mfi.mptcp_info);
|
||||
copylen += min_t(unsigned int, len, sizeof(struct mptcp_info));
|
||||
}
|
||||
|
||||
mfi.size_tcpinfo_kernel = sizeof(struct tcp_info);
|
||||
mfi.size_tcpinfo_user = min_t(unsigned int, mfi.size_tcpinfo_user,
|
||||
sizeof(struct tcp_info));
|
||||
sfinfoptr = u64_to_user_ptr(mfi.subflow_info);
|
||||
mfi.size_sfinfo_kernel = sizeof(struct mptcp_subflow_info);
|
||||
mfi.size_sfinfo_user = min_t(unsigned int, mfi.size_sfinfo_user,
|
||||
sizeof(struct mptcp_subflow_info));
|
||||
tcpinfoptr = u64_to_user_ptr(mfi.tcp_info);
|
||||
|
||||
lock_sock(sk);
|
||||
mptcp_for_each_subflow(msk, subflow) {
|
||||
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
|
||||
struct mptcp_subflow_info sfinfo;
|
||||
struct tcp_info tcp_info;
|
||||
|
||||
if (sfcount++ >= mfi.size_arrays_user)
|
||||
continue;
|
||||
|
||||
/* fetch addr/tcp_info only if the user space buffers
|
||||
* are wide enough
|
||||
*/
|
||||
memset(&sfinfo, 0, sizeof(sfinfo));
|
||||
sfinfo.id = subflow->subflow_id;
|
||||
if (mfi.size_sfinfo_user >
|
||||
offsetof(struct mptcp_subflow_info, addrs))
|
||||
mptcp_get_sub_addrs(ssk, &sfinfo.addrs);
|
||||
if (copy_to_user(sfinfoptr, &sfinfo, mfi.size_sfinfo_user))
|
||||
goto fail_release;
|
||||
|
||||
if (mfi.size_tcpinfo_user) {
|
||||
tcp_get_info(ssk, &tcp_info);
|
||||
if (copy_to_user(tcpinfoptr, &tcp_info,
|
||||
mfi.size_tcpinfo_user))
|
||||
goto fail_release;
|
||||
}
|
||||
|
||||
tcpinfoptr += mfi.size_tcpinfo_user;
|
||||
sfinfoptr += mfi.size_sfinfo_user;
|
||||
}
|
||||
release_sock(sk);
|
||||
|
||||
mfi.num_subflows = sfcount;
|
||||
if (mptcp_put_full_info(&mfi, optval, copylen, optlen))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
|
||||
fail_release:
|
||||
release_sock(sk);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int mptcp_put_int_option(struct mptcp_sock *msk, char __user *optval,
|
||||
int __user *optlen, int val)
|
||||
{
|
||||
|
@ -1220,6 +1356,8 @@ static int mptcp_getsockopt_sol_mptcp(struct mptcp_sock *msk, int optname,
|
|||
switch (optname) {
|
||||
case MPTCP_INFO:
|
||||
return mptcp_getsockopt_info(msk, optval, optlen);
|
||||
case MPTCP_FULL_INFO:
|
||||
return mptcp_getsockopt_full_info(msk, optval, optlen);
|
||||
case MPTCP_TCPINFO:
|
||||
return mptcp_getsockopt_tcpinfo(msk, optval, optlen);
|
||||
case MPTCP_SUBFLOW_ADDRS:
|
||||
|
|
|
@ -819,6 +819,7 @@ create_child:
|
|||
if (!ctx->conn)
|
||||
goto fallback;
|
||||
|
||||
ctx->subflow_id = 1;
|
||||
owner = mptcp_sk(ctx->conn);
|
||||
mptcp_pm_new_connection(owner, child, 1);
|
||||
|
||||
|
@ -1574,6 +1575,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
|
|||
subflow->remote_id = remote_id;
|
||||
subflow->request_join = 1;
|
||||
subflow->request_bkup = !!(flags & MPTCP_PM_ADDR_FLAG_BACKUP);
|
||||
subflow->subflow_id = msk->subflow_id++;
|
||||
mptcp_info2sockaddr(remote, &addr, ssk->sk_family);
|
||||
|
||||
sock_hold(ssk);
|
||||
|
|
|
@ -1683,12 +1683,12 @@ chk_add_tx_nr()
|
|||
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
|
||||
|
||||
printf "%-${nr_blank}s %s" " " "add TX"
|
||||
count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
|
||||
count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
|
||||
if [ -z "$count" ]; then
|
||||
echo -n "[skip]"
|
||||
# if the test configured a short timeout tolerate greater then expected
|
||||
# add addrs options, due to retransmissions
|
||||
if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
|
||||
elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
|
||||
echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr"
|
||||
fail_test
|
||||
else
|
||||
|
@ -1696,9 +1696,10 @@ chk_add_tx_nr()
|
|||
fi
|
||||
|
||||
echo -n " - echo TX "
|
||||
count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$echo_tx_nr" ]; then
|
||||
count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
|
||||
if [ -z "$count" ]; then
|
||||
echo "[skip]"
|
||||
elif [ "$count" != "$echo_tx_nr" ]; then
|
||||
echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
|
||||
fail_test
|
||||
else
|
||||
|
@ -1734,9 +1735,10 @@ chk_rm_nr()
|
|||
fi
|
||||
|
||||
printf "%-${nr_blank}s %s" " " "rm "
|
||||
count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$rm_addr_nr" ]; then
|
||||
count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
|
||||
if [ -z "$count" ]; then
|
||||
echo -n "[skip]"
|
||||
elif [ "$count" != "$rm_addr_nr" ]; then
|
||||
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
|
||||
fail_test
|
||||
else
|
||||
|
@ -1778,16 +1780,15 @@ chk_rm_tx_nr()
|
|||
local rm_addr_tx_nr=$1
|
||||
|
||||
printf "%-${nr_blank}s %s" " " "rm TX "
|
||||
count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}')
|
||||
[ -z "$count" ] && count=0
|
||||
if [ "$count" != "$rm_addr_tx_nr" ]; then
|
||||
count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
|
||||
if [ -z "$count" ]; then
|
||||
echo "[skip]"
|
||||
elif [ "$count" != "$rm_addr_tx_nr" ]; then
|
||||
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr"
|
||||
fail_test
|
||||
else
|
||||
echo -n "[ ok ]"
|
||||
echo "[ ok ]"
|
||||
fi
|
||||
|
||||
echo "$extra_msg"
|
||||
}
|
||||
|
||||
chk_prio_nr()
|
||||
|
|
|
@ -51,6 +51,11 @@ struct mptcp_info {
|
|||
__u8 mptcpi_local_addr_used;
|
||||
__u8 mptcpi_local_addr_max;
|
||||
__u8 mptcpi_csum_enabled;
|
||||
__u32 mptcpi_retransmits;
|
||||
__u64 mptcpi_bytes_retrans;
|
||||
__u64 mptcpi_bytes_sent;
|
||||
__u64 mptcpi_bytes_received;
|
||||
__u64 mptcpi_bytes_acked;
|
||||
};
|
||||
|
||||
struct mptcp_subflow_data {
|
||||
|
@ -81,10 +86,41 @@ struct mptcp_subflow_addrs {
|
|||
#define MPTCP_SUBFLOW_ADDRS 3
|
||||
#endif
|
||||
|
||||
#ifndef MPTCP_FULL_INFO
|
||||
struct mptcp_subflow_info {
|
||||
__u32 id;
|
||||
struct mptcp_subflow_addrs addrs;
|
||||
};
|
||||
|
||||
struct mptcp_full_info {
|
||||
__u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
|
||||
__u32 size_tcpinfo_user;
|
||||
__u32 size_sfinfo_kernel; /* must be 0, set by kernel */
|
||||
__u32 size_sfinfo_user;
|
||||
__u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
|
||||
__u32 size_arrays_user; /* max subflows that userspace is interested in;
|
||||
* the buffers at subflow_info/tcp_info
|
||||
* are respectively at least:
|
||||
* size_arrays * size_sfinfo_user
|
||||
* size_arrays * size_tcpinfo_user
|
||||
* bytes wide
|
||||
*/
|
||||
__aligned_u64 subflow_info;
|
||||
__aligned_u64 tcp_info;
|
||||
struct mptcp_info mptcp_info;
|
||||
};
|
||||
|
||||
#define MPTCP_FULL_INFO 4
|
||||
#endif
|
||||
|
||||
struct so_state {
|
||||
struct mptcp_info mi;
|
||||
struct mptcp_info last_sample;
|
||||
struct tcp_info tcp_info;
|
||||
struct mptcp_subflow_addrs addrs;
|
||||
uint64_t mptcpi_rcv_delta;
|
||||
uint64_t tcpi_rcv_delta;
|
||||
bool pkt_stats_avail;
|
||||
};
|
||||
|
||||
#ifndef MIN
|
||||
|
@ -322,8 +358,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
|
|||
if (ret < 0)
|
||||
die_perror("getsockopt MPTCP_INFO");
|
||||
|
||||
assert(olen == sizeof(i));
|
||||
s->pkt_stats_avail = olen >= sizeof(i);
|
||||
|
||||
s->last_sample = i;
|
||||
if (s->mi.mptcpi_write_seq == 0)
|
||||
s->mi = i;
|
||||
|
||||
|
@ -362,6 +399,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t
|
|||
olen -= sizeof(struct mptcp_subflow_data);
|
||||
assert(olen == ti.d.size_user);
|
||||
|
||||
s->tcp_info = ti.ti[0];
|
||||
|
||||
if (ti.ti[0].tcpi_bytes_sent == w &&
|
||||
ti.ti[0].tcpi_bytes_received == r)
|
||||
goto done;
|
||||
|
@ -383,7 +422,7 @@ done:
|
|||
do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
|
||||
}
|
||||
|
||||
static void do_getsockopt_subflow_addrs(int fd)
|
||||
static void do_getsockopt_subflow_addrs(struct so_state *s, int fd)
|
||||
{
|
||||
struct sockaddr_storage remote, local;
|
||||
socklen_t olen, rlen, llen;
|
||||
|
@ -431,6 +470,7 @@ static void do_getsockopt_subflow_addrs(int fd)
|
|||
|
||||
assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
|
||||
assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
|
||||
s->addrs = addrs.addr[0];
|
||||
|
||||
memset(&addrs, 0, sizeof(addrs));
|
||||
|
||||
|
@ -451,13 +491,70 @@ static void do_getsockopt_subflow_addrs(int fd)
|
|||
do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
|
||||
}
|
||||
|
||||
static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd)
|
||||
{
|
||||
size_t data_size = sizeof(struct mptcp_full_info);
|
||||
struct mptcp_subflow_info sfinfo[2];
|
||||
struct tcp_info tcp_info[2];
|
||||
struct mptcp_full_info mfi;
|
||||
socklen_t olen;
|
||||
int ret;
|
||||
|
||||
memset(&mfi, 0, data_size);
|
||||
memset(tcp_info, 0, sizeof(tcp_info));
|
||||
memset(sfinfo, 0, sizeof(sfinfo));
|
||||
|
||||
mfi.size_tcpinfo_user = sizeof(struct tcp_info);
|
||||
mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info);
|
||||
mfi.size_arrays_user = 2;
|
||||
mfi.subflow_info = (unsigned long)&sfinfo[0];
|
||||
mfi.tcp_info = (unsigned long)&tcp_info[0];
|
||||
olen = data_size;
|
||||
|
||||
ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen);
|
||||
if (ret < 0) {
|
||||
if (errno == EOPNOTSUPP) {
|
||||
perror("MPTCP_FULL_INFO test skipped");
|
||||
return;
|
||||
}
|
||||
xerror("getsockopt MPTCP_FULL_INFO");
|
||||
}
|
||||
|
||||
assert(olen <= data_size);
|
||||
assert(mfi.size_tcpinfo_kernel > 0);
|
||||
assert(mfi.size_tcpinfo_user ==
|
||||
MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info)));
|
||||
assert(mfi.size_sfinfo_kernel > 0);
|
||||
assert(mfi.size_sfinfo_user ==
|
||||
MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info)));
|
||||
assert(mfi.num_subflows == 1);
|
||||
|
||||
/* Tolerate future extension to mptcp_info struct and running newer
|
||||
* test on top of older kernel.
|
||||
* Anyway any kernel supporting MPTCP_FULL_INFO must at least include
|
||||
* the following in mptcp_info.
|
||||
*/
|
||||
assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info));
|
||||
assert(mfi.mptcp_info.mptcpi_subflows == 0);
|
||||
assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent);
|
||||
assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received);
|
||||
|
||||
assert(sfinfo[0].id == 1);
|
||||
assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent);
|
||||
assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received);
|
||||
assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs)));
|
||||
}
|
||||
|
||||
static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
|
||||
{
|
||||
do_getsockopt_mptcp_info(s, fd, w);
|
||||
|
||||
do_getsockopt_tcp_info(s, fd, r, w);
|
||||
|
||||
do_getsockopt_subflow_addrs(fd);
|
||||
do_getsockopt_subflow_addrs(s, fd);
|
||||
|
||||
if (r)
|
||||
do_getsockopt_mptcp_full_info(s, fd);
|
||||
}
|
||||
|
||||
static void connect_one_server(int fd, int pipefd)
|
||||
|
@ -562,6 +659,23 @@ static void process_one_client(int fd, int pipefd)
|
|||
do_getsockopts(&s, fd, ret, ret2);
|
||||
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
|
||||
xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
|
||||
|
||||
/* be nice when running on top of older kernel */
|
||||
if (s.pkt_stats_avail) {
|
||||
if (s.last_sample.mptcpi_bytes_sent != ret2)
|
||||
xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
|
||||
s.last_sample.mptcpi_bytes_sent, ret2,
|
||||
s.last_sample.mptcpi_bytes_sent - ret2);
|
||||
if (s.last_sample.mptcpi_bytes_received != ret)
|
||||
xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
|
||||
s.last_sample.mptcpi_bytes_received, ret,
|
||||
s.last_sample.mptcpi_bytes_received - ret);
|
||||
if (s.last_sample.mptcpi_bytes_acked != ret)
|
||||
xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
|
||||
s.last_sample.mptcpi_bytes_acked, ret2,
|
||||
s.last_sample.mptcpi_bytes_acked - ret2);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue