From 00fab6cf323fa5850e6cbe283b23e605e6e97912 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:30 +0200 Subject: [PATCH 01/14] smb: smbdirect: add smbdirect_pdu.h with protocol definitions This is just a start moving into a common smbdirect layer. It will be used in the next commits... Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_pdu.h | 55 +++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_pdu.h diff --git a/fs/smb/common/smbdirect/smbdirect_pdu.h b/fs/smb/common/smbdirect/smbdirect_pdu.h new file mode 100644 index 000000000000..ae9fdb05ce23 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_pdu.h @@ -0,0 +1,55 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2017 Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ + +#define SMBDIRECT_V1 0x0100 + +/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ +struct smbdirect_negotiate_req { + __le16 min_version; + __le16 max_version; + __le16 reserved; + __le16 credits_requested; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ +struct smbdirect_negotiate_resp { + __le16 min_version; + __le16 max_version; + __le16 negotiated_version; + __le16 reserved; + __le16 credits_requested; + __le16 credits_granted; + __le32 status; + __le32 max_readwrite_size; + __le32 preferred_send_size; + __le32 max_receive_size; + __le32 max_fragmented_size; +} __packed; + +#define SMBDIRECT_DATA_MIN_HDR_SIZE 0x14 +#define SMBDIRECT_DATA_OFFSET 0x18 + +#define SMBDIRECT_FLAG_RESPONSE_REQUESTED 0x0001 + +/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ +struct smbdirect_data_transfer { + __le16 credits_requested; + __le16 credits_granted; + __le16 flags; + __le16 reserved; + __le32 remaining_data_length; + __le32 data_offset; + __le32 data_length; + __le32 padding; + __u8 buffer[]; +} __packed; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_PDU_H__ */ From 64946d5be665ddac6b5bf11f5b5ff319aae0f4c6 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:31 +0200 Subject: [PATCH 02/14] smb: client: make use of common smbdirect_pdu.h Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.c | 40 ++++++++++++++++++-------------------- fs/smb/client/smbdirect.h | 41 --------------------------------------- 2 files changed, 19 insertions(+), 62 deletions(-) diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index b0b7254661e9..0b08169e885d 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -7,6 +7,7 @@ #include #include #include +#include "../common/smbdirect/smbdirect_pdu.h" #include "smbdirect.h" #include "cifs_debug.h" #include "cifsproto.h" @@ -50,9 +51,6 @@ struct smb_extract_to_rdma { static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, struct smb_extract_to_rdma *rdma); -/* SMBD version number */ -#define SMBD_V1 0x0100 - /* Port numbers for SMBD transport */ #define SMB_PORT 445 #define SMBD_PORT 5445 @@ -299,7 +297,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) mempool_free(request, request->info->request_mempool); } -static void dump_smbd_negotiate_resp(struct smbd_negotiate_resp *resp) +static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp) { log_rdma_event(INFO, "resp message min_version %u max_version %u negotiated_version %u credits_requested %u credits_granted %u status %u max_readwrite_size %u preferred_send_size %u max_receive_size %u max_fragmented_size %u\n", resp->min_version, resp->max_version, @@ -318,15 +316,15 @@ static bool process_negotiation_response( struct smbd_response *response, int packet_length) { struct smbd_connection *info = response->info; - struct smbd_negotiate_resp *packet = smbd_response_payload(response); + struct smbdirect_negotiate_resp *packet = smbd_response_payload(response); - if (packet_length < sizeof(struct smbd_negotiate_resp)) { + if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { log_rdma_event(ERR, "error: packet_length=%d\n", packet_length); return false; } - if (le16_to_cpu(packet->negotiated_version) != SMBD_V1) { + if (le16_to_cpu(packet->negotiated_version) != SMBDIRECT_V1) { log_rdma_event(ERR, "error: negotiated_version=%x\n", le16_to_cpu(packet->negotiated_version)); return false; @@ -448,7 +446,7 @@ static void smbd_post_send_credits(struct work_struct *work) /* Called from softirq, when recv is done */ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_data_transfer *data_transfer; + struct smbdirect_data_transfer *data_transfer; struct smbd_response *response = container_of(wc->wr_cqe, struct smbd_response, cqe); struct smbd_connection *info = response->info; @@ -474,7 +472,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) switch (response->type) { /* SMBD negotiation response */ case SMBD_NEGOTIATE_RESP: - dump_smbd_negotiate_resp(smbd_response_payload(response)); + dump_smbdirect_negotiate_resp(smbd_response_payload(response)); info->full_packet_received = true; info->negotiate_done = process_negotiation_response(response, wc->byte_len); @@ -531,7 +529,7 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) /* Send a KEEP_ALIVE response right away if requested */ info->keep_alive_requested = KEEP_ALIVE_NONE; if (le16_to_cpu(data_transfer->flags) & - SMB_DIRECT_RESPONSE_REQUESTED) { + SMBDIRECT_FLAG_RESPONSE_REQUESTED) { info->keep_alive_requested = KEEP_ALIVE_PENDING; } @@ -686,7 +684,7 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; - struct smbd_negotiate_req *packet; + struct smbdirect_negotiate_req *packet; request = mempool_alloc(info->request_mempool, GFP_KERNEL); if (!request) @@ -695,8 +693,8 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->info = info; packet = smbd_request_payload(request); - packet->min_version = cpu_to_le16(SMBD_V1); - packet->max_version = cpu_to_le16(SMBD_V1); + packet->min_version = cpu_to_le16(SMBDIRECT_V1); + packet->max_version = cpu_to_le16(SMBDIRECT_V1); packet->reserved = 0; packet->credits_requested = cpu_to_le16(info->send_credit_target); packet->preferred_send_size = cpu_to_le32(info->max_send_size); @@ -774,10 +772,10 @@ static int manage_credits_prior_sending(struct smbd_connection *info) /* * Check if we need to send a KEEP_ALIVE message * The idle connection timer triggers a KEEP_ALIVE message when expires - * SMB_DIRECT_RESPONSE_REQUESTED is set in the message flag to have peer send + * SMBDIRECT_FLAG_RESPONSE_REQUESTED is set in the message flag to have peer send * back a response. * return value: - * 1 if SMB_DIRECT_RESPONSE_REQUESTED needs to be set + * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set * 0: otherwise */ static int manage_keep_alive_before_sending(struct smbd_connection *info) @@ -837,7 +835,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, int header_length; int data_length; struct smbd_request *request; - struct smbd_data_transfer *packet; + struct smbdirect_data_transfer *packet; int new_credits = 0; wait_credit: @@ -919,7 +917,7 @@ wait_send_queue: packet->flags = 0; if (manage_keep_alive_before_sending(info)) - packet->flags |= cpu_to_le16(SMB_DIRECT_RESPONSE_REQUESTED); + packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; if (!data_length) @@ -938,10 +936,10 @@ wait_send_queue: le32_to_cpu(packet->remaining_data_length)); /* Map the packet to DMA */ - header_length = sizeof(struct smbd_data_transfer); + header_length = sizeof(struct smbdirect_data_transfer); /* If this is a packet without payload, don't send padding */ if (!data_length) - header_length = offsetof(struct smbd_data_transfer, padding); + header_length = offsetof(struct smbdirect_data_transfer, padding); request->sge[0].addr = ib_dma_map_single(info->id->device, (void *)packet, @@ -1432,7 +1430,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) kmem_cache_create( name, sizeof(struct smbd_request) + - sizeof(struct smbd_data_transfer), + sizeof(struct smbdirect_data_transfer), 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->request_cache) return -ENOMEM; @@ -1735,7 +1733,7 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf, unsigned int size) { struct smbd_response *response; - struct smbd_data_transfer *data_transfer; + struct smbdirect_data_transfer *data_transfer; int to_copy, to_read, data_read, offset; u32 data_length, remaining_data_length, data_offset; int rc; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index c08e3665150d..4da0974ce730 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -177,47 +177,6 @@ enum smbd_message_type { SMBD_TRANSFER_DATA, }; -#define SMB_DIRECT_RESPONSE_REQUESTED 0x0001 - -/* SMBD negotiation request packet [MS-SMBD] 2.2.1 */ -struct smbd_negotiate_req { - __le16 min_version; - __le16 max_version; - __le16 reserved; - __le16 credits_requested; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMBD negotiation response packet [MS-SMBD] 2.2.2 */ -struct smbd_negotiate_resp { - __le16 min_version; - __le16 max_version; - __le16 negotiated_version; - __le16 reserved; - __le16 credits_requested; - __le16 credits_granted; - __le32 status; - __le32 max_readwrite_size; - __le32 preferred_send_size; - __le32 max_receive_size; - __le32 max_fragmented_size; -} __packed; - -/* SMBD data transfer packet with payload [MS-SMBD] 2.2.3 */ -struct smbd_data_transfer { - __le16 credits_requested; - __le16 credits_granted; - __le16 flags; - __le16 reserved; - __le32 remaining_data_length; - __le32 data_offset; - __le32 data_length; - __le32 padding; - __u8 buffer[]; -} __packed; - /* The packet fields for a registered RDMA buffer */ struct smbd_buffer_descriptor_v1 { __le64 offset; From 7e136a718633b2c54764e185f3bfccf0763fc1dd Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:33 +0200 Subject: [PATCH 03/14] smb: smbdirect: add smbdirect.h with public structures Will be used in client and server in the next commits. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee CC: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect.h diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h new file mode 100644 index 000000000000..eedbdf0d0433 --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2017, Microsoft Corporation. + * Copyright (C) 2018, LG Electronics. + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ + +/* SMB-DIRECT buffer descriptor V1 structure [MS-SMBD] 2.2.3.1 */ +struct smbdirect_buffer_descriptor_v1 { + __le64 offset; + __le32 token; + __le32 length; +} __packed; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */ From 21604ed60864b9ace2e28a1c86411f388f03f94e Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:34 +0200 Subject: [PATCH 04/14] smb: client: make use of common smbdirect.h Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smb2pdu.c | 17 +++++++++-------- fs/smb/client/smbdirect.h | 7 ------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 0c320d06809c..2bd814cd612b 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -36,6 +36,7 @@ #include "smb2glob.h" #include "cifspdu.h" #include "cifs_spnego.h" +#include "../common/smbdirect/smbdirect.h" #include "smbdirect.h" #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL @@ -4449,10 +4450,10 @@ smb2_new_read_req(void **buf, unsigned int *total_len, #ifdef CONFIG_CIFS_SMB_DIRECT /* * If we want to do a RDMA write, fill in and append - * smbd_buffer_descriptor_v1 to the end of read request + * smbdirect_buffer_descriptor_v1 to the end of read request */ if (rdata && smb3_use_rdma_offload(io_parms)) { - struct smbd_buffer_descriptor_v1 *v1; + struct smbdirect_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; rdata->mr = smbd_register_mr(server->smbd_conn, &rdata->subreq.io_iter, @@ -4466,8 +4467,8 @@ smb2_new_read_req(void **buf, unsigned int *total_len, req->ReadChannelInfoOffset = cpu_to_le16(offsetof(struct smb2_read_req, Buffer)); req->ReadChannelInfoLength = - cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); - v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); + v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; v1->offset = cpu_to_le64(rdata->mr->mr->iova); v1->token = cpu_to_le32(rdata->mr->mr->rkey); v1->length = cpu_to_le32(rdata->mr->mr->length); @@ -4975,10 +4976,10 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) #ifdef CONFIG_CIFS_SMB_DIRECT /* * If we want to do a server RDMA read, fill in and append - * smbd_buffer_descriptor_v1 to the end of write request + * smbdirect_buffer_descriptor_v1 to the end of write request */ if (smb3_use_rdma_offload(io_parms)) { - struct smbd_buffer_descriptor_v1 *v1; + struct smbdirect_buffer_descriptor_v1 *v1; bool need_invalidate = server->dialect == SMB30_PROT_ID; wdata->mr = smbd_register_mr(server->smbd_conn, &wdata->subreq.io_iter, @@ -4997,8 +4998,8 @@ smb2_async_writev(struct cifs_io_subrequest *wdata) req->WriteChannelInfoOffset = cpu_to_le16(offsetof(struct smb2_write_req, Buffer)); req->WriteChannelInfoLength = - cpu_to_le16(sizeof(struct smbd_buffer_descriptor_v1)); - v1 = (struct smbd_buffer_descriptor_v1 *) &req->Buffer[0]; + cpu_to_le16(sizeof(struct smbdirect_buffer_descriptor_v1)); + v1 = (struct smbdirect_buffer_descriptor_v1 *) &req->Buffer[0]; v1->offset = cpu_to_le64(wdata->mr->mr->iova); v1->token = cpu_to_le32(wdata->mr->mr->rkey); v1->length = cpu_to_le32(wdata->mr->mr->length); diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 4da0974ce730..8561e19a23a3 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -177,13 +177,6 @@ enum smbd_message_type { SMBD_TRANSFER_DATA, }; -/* The packet fields for a registered RDMA buffer */ -struct smbd_buffer_descriptor_v1 { - __le64 offset; - __le32 token; - __le32 length; -} __packed; - /* Maximum number of SGEs used by smbdirect.c in any send work request */ #define SMBDIRECT_MAX_SEND_SGE 6 From 22234e37d7e97652cb53133009da5e14793d3c10 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:36 +0200 Subject: [PATCH 05/14] smb: smbdirect: add smbdirect_socket.h This abstracts the common smbdirect layer. Currently with just a few things in it, but that will change over time until everything is in common. Will be used in client and server in the next commits Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/common/smbdirect/smbdirect_socket.h | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 fs/smb/common/smbdirect/smbdirect_socket.h diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h new file mode 100644 index 000000000000..69a55561f91a --- /dev/null +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2025 Stefan Metzmacher + */ + +#ifndef __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ +#define __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ + +enum smbdirect_socket_status { + SMBDIRECT_SOCKET_CREATED, + SMBDIRECT_SOCKET_CONNECTING, + SMBDIRECT_SOCKET_CONNECTED, + SMBDIRECT_SOCKET_NEGOTIATE_FAILED, + SMBDIRECT_SOCKET_DISCONNECTING, + SMBDIRECT_SOCKET_DISCONNECTED, + SMBDIRECT_SOCKET_DESTROYED +}; + +struct smbdirect_socket { + enum smbdirect_socket_status status; + + /* RDMA related */ + struct { + struct rdma_cm_id *cm_id; + } rdma; + + /* IB verbs related */ + struct { + struct ib_pd *pd; + struct ib_cq *send_cq; + struct ib_cq *recv_cq; + + /* + * shortcuts for rdma.cm_id->{qp,device}; + */ + struct ib_qp *qp; + struct ib_device *dev; + } ib; +}; + +#endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From c3011b9a7deaaaabdf955815d29eac39c8b75e67 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:37 +0200 Subject: [PATCH 06/14] smb: client: make use of common smbdirect_socket This is the next step in the direction of a common smbdirect layer. Currently only structures are shared, but that will change over time until everything is shared. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 2 +- fs/smb/client/smbdirect.c | 258 ++++++++++++++++++++----------------- fs/smb/client/smbdirect.h | 10 +- 3 files changed, 145 insertions(+), 125 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index e03c890de0a0..56b0b5c82dd1 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -387,7 +387,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " "transport status: %x", server->smbd_conn->protocol, - server->smbd_conn->transport_status); + server->smbd_conn->socket.status); seq_printf(m, "\nConn receive_credit_max: %x " "send_credit_target: %x max_send_size: %x", server->smbd_conn->receive_credit_max, diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 0b08169e885d..608ecd2149ed 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -163,10 +163,11 @@ static void smbd_disconnect_rdma_work(struct work_struct *work) { struct smbd_connection *info = container_of(work, struct smbd_connection, disconnect_work); + struct smbdirect_socket *sc = &info->socket; - if (info->transport_status == SMBD_CONNECTED) { - info->transport_status = SMBD_DISCONNECTING; - rdma_disconnect(info->id); + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) { + sc->status = SMBDIRECT_SOCKET_DISCONNECTING; + rdma_disconnect(sc->rdma.cm_id); } } @@ -180,6 +181,7 @@ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) { struct smbd_connection *info = id->context; + struct smbdirect_socket *sc = &info->socket; log_rdma_event(INFO, "event=%d status=%d\n", event->event, event->status); @@ -203,7 +205,7 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%d\n", event->event); - info->transport_status = SMBD_CONNECTED; + sc->status = SMBDIRECT_SOCKET_CONNECTED; wake_up_interruptible(&info->conn_wait); break; @@ -211,20 +213,20 @@ static int smbd_conn_upcall( case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: log_rdma_event(INFO, "connecting failed event=%d\n", event->event); - info->transport_status = SMBD_DISCONNECTED; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->conn_wait); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: case RDMA_CM_EVENT_DISCONNECTED: /* This happens when we fail the negotiation */ - if (info->transport_status == SMBD_NEGOTIATE_FAILED) { - info->transport_status = SMBD_DISCONNECTED; + if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up(&info->conn_wait); break; } - info->transport_status = SMBD_DISCONNECTED; + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; wake_up_interruptible(&info->disconn_wait); wake_up_interruptible(&info->wait_reassembly_queue); wake_up_interruptible_all(&info->wait_send_queue); @@ -273,6 +275,8 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) int i; struct smbd_request *request = container_of(wc->wr_cqe, struct smbd_request, cqe); + struct smbd_connection *info = request->info; + struct smbdirect_socket *sc = &info->socket; log_rdma_send(INFO, "smbd_request 0x%p completed wc->status=%d\n", request, wc->status); @@ -284,7 +288,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) } for (i = 0; i < request->num_sge; i++) - ib_dma_unmap_single(request->info->id->device, + ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -391,8 +395,9 @@ static void smbd_post_send_credits(struct work_struct *work) struct smbd_connection *info = container_of(work, struct smbd_connection, post_send_credits_work); + struct smbdirect_socket *sc = &info->socket; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { wake_up(&info->wait_receive_queues); return; } @@ -633,32 +638,34 @@ static int smbd_ia_open( struct smbd_connection *info, struct sockaddr *dstaddr, int port) { + struct smbdirect_socket *sc = &info->socket; int rc; - info->id = smbd_create_id(info, dstaddr, port); - if (IS_ERR(info->id)) { - rc = PTR_ERR(info->id); + sc->rdma.cm_id = smbd_create_id(info, dstaddr, port); + if (IS_ERR(sc->rdma.cm_id)) { + rc = PTR_ERR(sc->rdma.cm_id); goto out1; } + sc->ib.dev = sc->rdma.cm_id->device; - if (!frwr_is_supported(&info->id->device->attrs)) { + if (!frwr_is_supported(&sc->ib.dev->attrs)) { log_rdma_event(ERR, "Fast Registration Work Requests (FRWR) is not supported\n"); log_rdma_event(ERR, "Device capability flags = %llx max_fast_reg_page_list_len = %u\n", - info->id->device->attrs.device_cap_flags, - info->id->device->attrs.max_fast_reg_page_list_len); + sc->ib.dev->attrs.device_cap_flags, + sc->ib.dev->attrs.max_fast_reg_page_list_len); rc = -EPROTONOSUPPORT; goto out2; } info->max_frmr_depth = min_t(int, smbd_max_frmr_depth, - info->id->device->attrs.max_fast_reg_page_list_len); + sc->ib.dev->attrs.max_fast_reg_page_list_len); info->mr_type = IB_MR_TYPE_MEM_REG; - if (info->id->device->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) + if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) info->mr_type = IB_MR_TYPE_SG_GAPS; - info->pd = ib_alloc_pd(info->id->device, 0); - if (IS_ERR(info->pd)) { - rc = PTR_ERR(info->pd); + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { + rc = PTR_ERR(sc->ib.pd); log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); goto out2; } @@ -666,8 +673,8 @@ static int smbd_ia_open( return 0; out2: - rdma_destroy_id(info->id); - info->id = NULL; + rdma_destroy_id(sc->rdma.cm_id); + sc->rdma.cm_id = NULL; out1: return rc; @@ -681,6 +688,7 @@ out1: */ static int smbd_post_send_negotiate_req(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; @@ -704,18 +712,18 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->num_sge = 1; request->sge[0].addr = ib_dma_map_single( - info->id->device, (void *)packet, + sc->ib.dev, (void *)packet, sizeof(*packet), DMA_TO_DEVICE); - if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { rc = -EIO; goto dma_mapping_failed; } request->sge[0].length = sizeof(*packet); - request->sge[0].lkey = info->pd->local_dma_lkey; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; ib_dma_sync_single_for_device( - info->id->device, request->sge[0].addr, + sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); request->cqe.done = send_done; @@ -732,14 +740,14 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].length, request->sge[0].lkey); atomic_inc(&info->send_pending); - rc = ib_post_send(info->id->qp, &send_wr, NULL); + rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (!rc) return 0; /* if we reach here, post send failed */ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); atomic_dec(&info->send_pending); - ib_dma_unmap_single(info->id->device, request->sge[0].addr, + ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); smbd_disconnect_rdma_connection(info); @@ -791,6 +799,7 @@ static int manage_keep_alive_before_sending(struct smbd_connection *info) static int smbd_post_send(struct smbd_connection *info, struct smbd_request *request) { + struct smbdirect_socket *sc = &info->socket; struct ib_send_wr send_wr; int rc, i; @@ -799,7 +808,7 @@ static int smbd_post_send(struct smbd_connection *info, "rdma_request sge[%d] addr=0x%llx length=%u\n", i, request->sge[i].addr, request->sge[i].length); ib_dma_sync_single_for_device( - info->id->device, + sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -814,7 +823,7 @@ static int smbd_post_send(struct smbd_connection *info, send_wr.opcode = IB_WR_SEND; send_wr.send_flags = IB_SEND_SIGNALED; - rc = ib_post_send(info->id->qp, &send_wr, NULL); + rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); smbd_disconnect_rdma_connection(info); @@ -831,6 +840,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, struct iov_iter *iter, int *_remaining_data_length) { + struct smbdirect_socket *sc = &info->socket; int i, rc; int header_length; int data_length; @@ -842,11 +852,11 @@ wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ rc = wait_event_interruptible(info->wait_send_queue, atomic_read(&info->send_credits) > 0 || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) goto err_wait_credit; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_outgoing(ERR, "disconnected not sending on wait_credit\n"); rc = -EAGAIN; goto err_wait_credit; @@ -859,9 +869,9 @@ wait_credit: wait_send_queue: wait_event(info->wait_post_send, atomic_read(&info->send_pending) < info->send_credit_target || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_outgoing(ERR, "disconnected not sending on wait_send_queue\n"); rc = -EAGAIN; goto err_wait_send_queue; @@ -888,8 +898,8 @@ wait_send_queue: .nr_sge = 1, .max_sge = SMBDIRECT_MAX_SEND_SGE, .sge = request->sge, - .device = info->id->device, - .local_dma_lkey = info->pd->local_dma_lkey, + .device = sc->ib.dev, + .local_dma_lkey = sc->ib.pd->local_dma_lkey, .direction = DMA_TO_DEVICE, }; @@ -941,18 +951,18 @@ wait_send_queue: if (!data_length) header_length = offsetof(struct smbdirect_data_transfer, padding); - request->sge[0].addr = ib_dma_map_single(info->id->device, + request->sge[0].addr = ib_dma_map_single(sc->ib.dev, (void *)packet, header_length, DMA_TO_DEVICE); - if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) { + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { rc = -EIO; request->sge[0].addr = 0; goto err_dma; } request->sge[0].length = header_length; - request->sge[0].lkey = info->pd->local_dma_lkey; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; rc = smbd_post_send(info, request); if (!rc) @@ -961,7 +971,7 @@ wait_send_queue: err_dma: for (i = 0; i < request->num_sge; i++) if (request->sge[i].addr) - ib_dma_unmap_single(info->id->device, + ib_dma_unmap_single(sc->ib.dev, request->sge[i].addr, request->sge[i].length, DMA_TO_DEVICE); @@ -1006,17 +1016,18 @@ static int smbd_post_send_empty(struct smbd_connection *info) static int smbd_post_recv( struct smbd_connection *info, struct smbd_response *response) { + struct smbdirect_socket *sc = &info->socket; struct ib_recv_wr recv_wr; int rc = -EIO; response->sge.addr = ib_dma_map_single( - info->id->device, response->packet, + sc->ib.dev, response->packet, info->max_receive_size, DMA_FROM_DEVICE); - if (ib_dma_mapping_error(info->id->device, response->sge.addr)) + if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) return rc; response->sge.length = info->max_receive_size; - response->sge.lkey = info->pd->local_dma_lkey; + response->sge.lkey = sc->ib.pd->local_dma_lkey; response->cqe.done = recv_done; @@ -1025,9 +1036,9 @@ static int smbd_post_recv( recv_wr.sg_list = &response->sge; recv_wr.num_sge = 1; - rc = ib_post_recv(info->id->qp, &recv_wr, NULL); + rc = ib_post_recv(sc->ib.qp, &recv_wr, NULL); if (rc) { - ib_dma_unmap_single(info->id->device, response->sge.addr, + ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); smbd_disconnect_rdma_connection(info); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); @@ -1185,9 +1196,10 @@ static struct smbd_response *get_receive_buffer(struct smbd_connection *info) static void put_receive_buffer( struct smbd_connection *info, struct smbd_response *response) { + struct smbdirect_socket *sc = &info->socket; unsigned long flags; - ib_dma_unmap_single(info->id->device, response->sge.addr, + ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); spin_lock_irqsave(&info->receive_queue_lock, flags); @@ -1287,6 +1299,7 @@ static void idle_connection_timer(struct work_struct *work) void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; + struct smbdirect_socket *sc; struct smbd_response *response; unsigned long flags; @@ -1294,19 +1307,21 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "rdma session already destroyed\n"); return; } + sc = &info->socket; log_rdma_event(INFO, "destroying rdma session\n"); - if (info->transport_status != SMBD_DISCONNECTED) { - rdma_disconnect(server->smbd_conn->id); + if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { + rdma_disconnect(sc->rdma.cm_id); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( info->disconn_wait, - info->transport_status == SMBD_DISCONNECTED); + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } log_rdma_event(INFO, "destroying qp\n"); - ib_drain_qp(info->id->qp); - rdma_destroy_qp(info->id); + ib_drain_qp(sc->ib.qp); + rdma_destroy_qp(sc->rdma.cm_id); + sc->ib.qp = NULL; log_rdma_event(INFO, "cancelling idle timer\n"); cancel_delayed_work_sync(&info->idle_timer_work); @@ -1353,10 +1368,10 @@ void smbd_destroy(struct TCP_Server_Info *server) } destroy_mr_list(info); - ib_free_cq(info->send_cq); - ib_free_cq(info->recv_cq); - ib_dealloc_pd(info->pd); - rdma_destroy_id(info->id); + ib_free_cq(sc->ib.send_cq); + ib_free_cq(sc->ib.recv_cq); + ib_dealloc_pd(sc->ib.pd); + rdma_destroy_id(sc->rdma.cm_id); /* free mempools */ mempool_destroy(info->request_mempool); @@ -1365,7 +1380,7 @@ void smbd_destroy(struct TCP_Server_Info *server) mempool_destroy(info->response_mempool); kmem_cache_destroy(info->response_cache); - info->transport_status = SMBD_DESTROYED; + sc->status = SMBDIRECT_SOCKET_DESTROYED; destroy_workqueue(info->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); @@ -1390,7 +1405,7 @@ int smbd_reconnect(struct TCP_Server_Info *server) * This is possible if transport is disconnected and we haven't received * notification from RDMA, but upper layer has detected timeout */ - if (server->smbd_conn->transport_status == SMBD_CONNECTED) { + if (server->smbd_conn->socket.status == SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(INFO, "disconnecting transport\n"); smbd_destroy(server); } @@ -1489,6 +1504,7 @@ static struct smbd_connection *_smbd_get_connection( { int rc; struct smbd_connection *info; + struct smbdirect_socket *sc; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; @@ -1498,29 +1514,30 @@ static struct smbd_connection *_smbd_get_connection( info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); if (!info) return NULL; + sc = &info->socket; - info->transport_status = SMBD_CONNECTING; + sc->status = SMBDIRECT_SOCKET_CONNECTING; rc = smbd_ia_open(info, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); goto create_id_failed; } - if (smbd_send_credit_target > info->id->device->attrs.max_cqe || - smbd_send_credit_target > info->id->device->attrs.max_qp_wr) { + if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe || + smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", smbd_send_credit_target, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - if (smbd_receive_credit_max > info->id->device->attrs.max_cqe || - smbd_receive_credit_max > info->id->device->attrs.max_qp_wr) { + if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe || + smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", smbd_receive_credit_max, - info->id->device->attrs.max_cqe, - info->id->device->attrs.max_qp_wr); + sc->ib.dev->attrs.max_cqe, + sc->ib.dev->attrs.max_qp_wr); goto config_failed; } @@ -1531,32 +1548,30 @@ static struct smbd_connection *_smbd_get_connection( info->max_receive_size = smbd_max_receive_size; info->keep_alive_interval = smbd_keep_alive_interval; - if (info->id->device->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || - info->id->device->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { + if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || + sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { log_rdma_event(ERR, "device %.*s max_send_sge/max_recv_sge = %d/%d too small\n", IB_DEVICE_NAME_MAX, - info->id->device->name, - info->id->device->attrs.max_send_sge, - info->id->device->attrs.max_recv_sge); + sc->ib.dev->name, + sc->ib.dev->attrs.max_send_sge, + sc->ib.dev->attrs.max_recv_sge); goto config_failed; } - info->send_cq = NULL; - info->recv_cq = NULL; - info->send_cq = - ib_alloc_cq_any(info->id->device, info, + sc->ib.send_cq = + ib_alloc_cq_any(sc->ib.dev, info, info->send_credit_target, IB_POLL_SOFTIRQ); - if (IS_ERR(info->send_cq)) { - info->send_cq = NULL; + if (IS_ERR(sc->ib.send_cq)) { + sc->ib.send_cq = NULL; goto alloc_cq_failed; } - info->recv_cq = - ib_alloc_cq_any(info->id->device, info, + sc->ib.recv_cq = + ib_alloc_cq_any(sc->ib.dev, info, info->receive_credit_max, IB_POLL_SOFTIRQ); - if (IS_ERR(info->recv_cq)) { - info->recv_cq = NULL; + if (IS_ERR(sc->ib.recv_cq)) { + sc->ib.recv_cq = NULL; goto alloc_cq_failed; } @@ -1570,29 +1585,30 @@ static struct smbd_connection *_smbd_get_connection( qp_attr.cap.max_inline_data = 0; qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR; qp_attr.qp_type = IB_QPT_RC; - qp_attr.send_cq = info->send_cq; - qp_attr.recv_cq = info->recv_cq; + qp_attr.send_cq = sc->ib.send_cq; + qp_attr.recv_cq = sc->ib.recv_cq; qp_attr.port_num = ~0; - rc = rdma_create_qp(info->id, info->pd, &qp_attr); + rc = rdma_create_qp(sc->rdma.cm_id, sc->ib.pd, &qp_attr); if (rc) { log_rdma_event(ERR, "rdma_create_qp failed %i\n", rc); goto create_qp_failed; } + sc->ib.qp = sc->rdma.cm_id->qp; memset(&conn_param, 0, sizeof(conn_param)); conn_param.initiator_depth = 0; conn_param.responder_resources = - min(info->id->device->attrs.max_qp_rd_atom, + min(sc->ib.dev->attrs.max_qp_rd_atom, SMBD_CM_RESPONDER_RESOURCES); info->responder_resources = conn_param.responder_resources; log_rdma_mr(INFO, "responder_resources=%d\n", info->responder_resources); /* Need to send IRD/ORD in private data for iWARP */ - info->id->device->ops.get_port_immutable( - info->id->device, info->id->port_num, &port_immutable); + sc->ib.dev->ops.get_port_immutable( + sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { ird_ord_hdr[0] = info->responder_resources; ird_ord_hdr[1] = 1; @@ -1613,16 +1629,16 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->conn_wait); init_waitqueue_head(&info->disconn_wait); init_waitqueue_head(&info->wait_reassembly_queue); - rc = rdma_connect(info->id, &conn_param); + rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); goto rdma_connect_failed; } wait_event_interruptible( - info->conn_wait, info->transport_status != SMBD_CONNECTING); + info->conn_wait, sc->status != SMBDIRECT_SOCKET_CONNECTING); - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); goto rdma_connect_failed; } @@ -1673,26 +1689,26 @@ allocate_mr_failed: negotiation_failed: cancel_delayed_work_sync(&info->idle_timer_work); destroy_caches_and_workqueue(info); - info->transport_status = SMBD_NEGOTIATE_FAILED; + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; init_waitqueue_head(&info->conn_wait); - rdma_disconnect(info->id); + rdma_disconnect(sc->rdma.cm_id); wait_event(info->conn_wait, - info->transport_status == SMBD_DISCONNECTED); + sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: rdma_connect_failed: - rdma_destroy_qp(info->id); + rdma_destroy_qp(sc->rdma.cm_id); create_qp_failed: alloc_cq_failed: - if (info->send_cq) - ib_free_cq(info->send_cq); - if (info->recv_cq) - ib_free_cq(info->recv_cq); + if (sc->ib.send_cq) + ib_free_cq(sc->ib.send_cq); + if (sc->ib.recv_cq) + ib_free_cq(sc->ib.recv_cq); config_failed: - ib_dealloc_pd(info->pd); - rdma_destroy_id(info->id); + ib_dealloc_pd(sc->ib.pd); + rdma_destroy_id(sc->rdma.cm_id); create_id_failed: kfree(info); @@ -1732,6 +1748,7 @@ try_again: static int smbd_recv_buf(struct smbd_connection *info, char *buf, unsigned int size) { + struct smbdirect_socket *sc = &info->socket; struct smbd_response *response; struct smbdirect_data_transfer *data_transfer; int to_copy, to_read, data_read, offset; @@ -1846,12 +1863,12 @@ read_rfc1002_done: rc = wait_event_interruptible( info->wait_reassembly_queue, info->reassembly_data_length >= size || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); /* Don't return any data if interrupted */ if (rc) return rc; - if (info->transport_status != SMBD_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { log_read(ERR, "disconnected\n"); return -ECONNABORTED; } @@ -1869,6 +1886,7 @@ static int smbd_recv_page(struct smbd_connection *info, struct page *page, unsigned int page_offset, unsigned int to_read) { + struct smbdirect_socket *sc = &info->socket; int ret; char *to_address; void *page_address; @@ -1877,7 +1895,7 @@ static int smbd_recv_page(struct smbd_connection *info, ret = wait_event_interruptible( info->wait_reassembly_queue, info->reassembly_data_length >= to_read || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (ret) return ret; @@ -1952,12 +1970,13 @@ int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst_array) { struct smbd_connection *info = server->smbd_conn; + struct smbdirect_socket *sc = &info->socket; struct smb_rqst *rqst; struct iov_iter iter; unsigned int remaining_data_length, klen; int rc, i, rqst_idx; - if (info->transport_status != SMBD_CONNECTED) + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) return -EAGAIN; /* @@ -2051,6 +2070,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) { struct smbd_connection *info = container_of(work, struct smbd_connection, mr_recovery_work); + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc; @@ -2068,7 +2088,7 @@ static void smbd_mr_recovery_work(struct work_struct *work) } smbdirect_mr->mr = ib_alloc_mr( - info->pd, info->mr_type, + sc->ib.pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", @@ -2097,12 +2117,13 @@ static void smbd_mr_recovery_work(struct work_struct *work) static void destroy_mr_list(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *mr, *tmp; cancel_work_sync(&info->mr_recovery_work); list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { if (mr->state == MR_INVALIDATED) - ib_dma_unmap_sg(info->id->device, mr->sgt.sgl, + ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); kfree(mr->sgt.sgl); @@ -2119,6 +2140,7 @@ static void destroy_mr_list(struct smbd_connection *info) */ static int allocate_mr_list(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; int i; struct smbd_mr *smbdirect_mr, *tmp; @@ -2134,7 +2156,7 @@ static int allocate_mr_list(struct smbd_connection *info) smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); if (!smbdirect_mr) goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(info->pd, info->mr_type, + smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type, info->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", @@ -2179,20 +2201,20 @@ cleanup_entries: */ static struct smbd_mr *get_mr(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *ret; int rc; again: rc = wait_event_interruptible(info->wait_mr, atomic_read(&info->mr_ready_count) || - info->transport_status != SMBD_CONNECTED); + sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) { log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); return NULL; } - if (info->transport_status != SMBD_CONNECTED) { - log_rdma_mr(ERR, "info->transport_status=%x\n", - info->transport_status); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + log_rdma_mr(ERR, "sc->status=%x\n", sc->status); return NULL; } @@ -2245,6 +2267,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate) { + struct smbdirect_socket *sc = &info->socket; struct smbd_mr *smbdirect_mr; int rc, num_pages; enum dma_data_direction dir; @@ -2274,7 +2297,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, num_pages, iov_iter_count(iter), info->max_frmr_depth); smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); - rc = ib_dma_map_sg(info->id->device, smbdirect_mr->sgt.sgl, + rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, dir); if (!rc) { log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n", @@ -2310,7 +2333,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, * on IB_WR_REG_MR. Hardware enforces a barrier and order of execution * on the next ib_post_send when we actually send I/O to remote peer */ - rc = ib_post_send(info->id->qp, ®_wr->wr, NULL); + rc = ib_post_send(sc->ib.qp, ®_wr->wr, NULL); if (!rc) return smbdirect_mr; @@ -2319,7 +2342,7 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ map_mr_error: - ib_dma_unmap_sg(info->id->device, smbdirect_mr->sgt.sgl, + ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); dma_map_error: @@ -2357,6 +2380,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) { struct ib_send_wr *wr; struct smbd_connection *info = smbdirect_mr->conn; + struct smbdirect_socket *sc = &info->socket; int rc = 0; if (smbdirect_mr->need_invalidate) { @@ -2370,7 +2394,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) wr->send_flags = IB_SEND_SIGNALED; init_completion(&smbdirect_mr->invalidate_done); - rc = ib_post_send(info->id->qp, wr, NULL); + rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); smbd_disconnect_rdma_connection(info); @@ -2387,7 +2411,7 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) if (smbdirect_mr->state == MR_INVALIDATED) { ib_dma_unmap_sg( - info->id->device, smbdirect_mr->sgt.sgl, + sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); smbdirect_mr->state = MR_READY; diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 8561e19a23a3..904c4e5b9e5c 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -15,6 +15,8 @@ #include #include +#include "../common/smbdirect/smbdirect_socket.h" + extern int rdma_readwrite_threshold; extern int smbd_max_frmr_depth; extern int smbd_keep_alive_interval; @@ -50,14 +52,8 @@ enum smbd_connection_status { * 5. mempools for allocating packets */ struct smbd_connection { - enum smbd_connection_status transport_status; + struct smbdirect_socket socket; - /* RDMA related */ - struct rdma_cm_id *id; - struct ib_qp_init_attr qp_attr; - struct ib_pd *pd; - struct ib_cq *send_cq, *recv_cq; - struct ib_device_attr dev_attr; int ri_rc; struct completion ri_done; wait_queue_head_t conn_wait; From dce8047f4725d4469c0813ff50c4115fc2d0b628 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:39 +0200 Subject: [PATCH 07/14] smb: smbdirect: introduce smbdirect_socket_parameters This is the next step in the direction of a common smbdirect layer. Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/smbdirect.h | 1 + fs/smb/common/smbdirect/smbdirect.h | 20 ++++++++++++++++++++ fs/smb/common/smbdirect/smbdirect_socket.h | 2 ++ 3 files changed, 23 insertions(+) diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 904c4e5b9e5c..3d325d73364a 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -15,6 +15,7 @@ #include #include +#include "../common/smbdirect/smbdirect.h" #include "../common/smbdirect/smbdirect_socket.h" extern int rdma_readwrite_threshold; diff --git a/fs/smb/common/smbdirect/smbdirect.h b/fs/smb/common/smbdirect/smbdirect.h index eedbdf0d0433..b9a385344ff3 100644 --- a/fs/smb/common/smbdirect/smbdirect.h +++ b/fs/smb/common/smbdirect/smbdirect.h @@ -14,4 +14,24 @@ struct smbdirect_buffer_descriptor_v1 { __le32 length; } __packed; +/* + * Connection parameters mostly from [MS-SMBD] 3.1.1.1 + * + * These are setup and negotiated at the beginning of a + * connection and remain constant unless explicitly changed. + * + * Some values are important for the upper layer. + */ +struct smbdirect_socket_parameters { + __u16 recv_credit_max; + __u16 send_credit_target; + __u32 max_send_size; + __u32 max_fragmented_send_size; + __u32 max_recv_size; + __u32 max_fragmented_recv_size; + __u32 max_read_write_size; + __u32 keepalive_interval_msec; + __u32 keepalive_timeout_msec; +} __packed; + #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_H__ */ diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h index 69a55561f91a..e5b15cc44a7b 100644 --- a/fs/smb/common/smbdirect/smbdirect_socket.h +++ b/fs/smb/common/smbdirect/smbdirect_socket.h @@ -36,6 +36,8 @@ struct smbdirect_socket { struct ib_qp *qp; struct ib_device *dev; } ib; + + struct smbdirect_socket_parameters parameters; }; #endif /* __FS_SMB_COMMON_SMBDIRECT_SMBDIRECT_SOCKET_H__ */ From cc55f65dd352bdb7bdf8db1c36fb348c294c3b66 Mon Sep 17 00:00:00 2001 From: Stefan Metzmacher Date: Wed, 28 May 2025 18:01:40 +0200 Subject: [PATCH 08/14] smb: client: make use of common smbdirect_socket_parameters Cc: Steve French Cc: Tom Talpey Cc: Long Li Cc: Namjae Jeon Cc: Hyunchul Lee Cc: Meetakshi Setiya Cc: linux-cifs@vger.kernel.org Cc: samba-technical@lists.samba.org Signed-off-by: Stefan Metzmacher Signed-off-by: Steve French --- fs/smb/client/cifs_debug.c | 21 +++++---- fs/smb/client/smb2ops.c | 14 ++++-- fs/smb/client/smbdirect.c | 91 ++++++++++++++++++++++---------------- fs/smb/client/smbdirect.h | 10 +---- 4 files changed, 77 insertions(+), 59 deletions(-) diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 56b0b5c82dd1..c0196be0e65f 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -362,6 +362,10 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) c = 0; spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { +#ifdef CONFIG_CIFS_SMB_DIRECT + struct smbdirect_socket_parameters *sp; +#endif + /* channel info will be printed as a part of sessions below */ if (SERVER_IS_CHAN(server)) continue; @@ -383,6 +387,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) seq_printf(m, "\nSMBDirect transport not available"); goto skip_rdma; } + sp = &server->smbd_conn->socket.parameters; seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " "transport status: %x", @@ -390,18 +395,18 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) server->smbd_conn->socket.status); seq_printf(m, "\nConn receive_credit_max: %x " "send_credit_target: %x max_send_size: %x", - server->smbd_conn->receive_credit_max, - server->smbd_conn->send_credit_target, - server->smbd_conn->max_send_size); + sp->recv_credit_max, + sp->send_credit_target, + sp->max_send_size); seq_printf(m, "\nConn max_fragmented_recv_size: %x " "max_fragmented_send_size: %x max_receive_size:%x", - server->smbd_conn->max_fragmented_recv_size, - server->smbd_conn->max_fragmented_send_size, - server->smbd_conn->max_receive_size); + sp->max_fragmented_recv_size, + sp->max_fragmented_send_size, + sp->max_recv_size); seq_printf(m, "\nConn keep_alive_interval: %x " "max_readwrite_size: %x rdma_readwrite_threshold: %x", - server->smbd_conn->keep_alive_interval, - server->smbd_conn->max_readwrite_size, + sp->keepalive_interval_msec * 1000, + sp->max_read_write_size, server->smbd_conn->rdma_readwrite_threshold); seq_printf(m, "\nDebug count_get_receive_buffer: %x " "count_put_receive_buffer: %x count_send_empty: %x", diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index bab9f567d9b7..1468c16ea9b8 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -504,6 +504,9 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { + struct smbdirect_socket_parameters *sp = + &server->smbd_conn->socket.parameters; + if (server->sign) /* * Account for SMB2 data transfer packet header and @@ -511,12 +514,12 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) */ wsize = min_t(unsigned int, wsize, - server->smbd_conn->max_fragmented_send_size - + sp->max_fragmented_send_size - SMB2_READWRITE_PDU_HEADER_SIZE - sizeof(struct smb2_transform_hdr)); else wsize = min_t(unsigned int, - wsize, server->smbd_conn->max_readwrite_size); + wsize, sp->max_read_write_size); } #endif if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) @@ -552,6 +555,9 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { + struct smbdirect_socket_parameters *sp = + &server->smbd_conn->socket.parameters; + if (server->sign) /* * Account for SMB2 data transfer packet header and @@ -559,12 +565,12 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) */ rsize = min_t(unsigned int, rsize, - server->smbd_conn->max_fragmented_recv_size - + sp->max_fragmented_recv_size - SMB2_READWRITE_PDU_HEADER_SIZE - sizeof(struct smb2_transform_hdr)); else rsize = min_t(unsigned int, - rsize, server->smbd_conn->max_readwrite_size); + rsize, sp->max_read_write_size); } #endif diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 608ecd2149ed..5ae847919da5 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -320,6 +320,8 @@ static bool process_negotiation_response( struct smbd_response *response, int packet_length) { struct smbd_connection *info = response->info; + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_negotiate_resp *packet = smbd_response_payload(response); if (packet_length < sizeof(struct smbdirect_negotiate_resp)) { @@ -349,20 +351,20 @@ static bool process_negotiation_response( atomic_set(&info->receive_credits, 0); - if (le32_to_cpu(packet->preferred_send_size) > info->max_receive_size) { + if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { log_rdma_event(ERR, "error: preferred_send_size=%d\n", le32_to_cpu(packet->preferred_send_size)); return false; } - info->max_receive_size = le32_to_cpu(packet->preferred_send_size); + sp->max_recv_size = le32_to_cpu(packet->preferred_send_size); if (le32_to_cpu(packet->max_receive_size) < SMBD_MIN_RECEIVE_SIZE) { log_rdma_event(ERR, "error: max_receive_size=%d\n", le32_to_cpu(packet->max_receive_size)); return false; } - info->max_send_size = min_t(int, info->max_send_size, - le32_to_cpu(packet->max_receive_size)); + sp->max_send_size = min_t(u32, sp->max_send_size, + le32_to_cpu(packet->max_receive_size)); if (le32_to_cpu(packet->max_fragmented_size) < SMBD_MIN_FRAGMENTED_SIZE) { @@ -370,18 +372,18 @@ static bool process_negotiation_response( le32_to_cpu(packet->max_fragmented_size)); return false; } - info->max_fragmented_send_size = + sp->max_fragmented_send_size = le32_to_cpu(packet->max_fragmented_size); info->rdma_readwrite_threshold = - rdma_readwrite_threshold > info->max_fragmented_send_size ? - info->max_fragmented_send_size : + rdma_readwrite_threshold > sp->max_fragmented_send_size ? + sp->max_fragmented_send_size : rdma_readwrite_threshold; - info->max_readwrite_size = min_t(u32, + sp->max_read_write_size = min_t(u32, le32_to_cpu(packet->max_readwrite_size), info->max_frmr_depth * PAGE_SIZE); - info->max_frmr_depth = info->max_readwrite_size / PAGE_SIZE; + info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; return true; } @@ -689,6 +691,7 @@ out1: static int smbd_post_send_negotiate_req(struct smbd_connection *info) { struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; struct smbd_request *request; @@ -704,11 +707,11 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) packet->min_version = cpu_to_le16(SMBDIRECT_V1); packet->max_version = cpu_to_le16(SMBDIRECT_V1); packet->reserved = 0; - packet->credits_requested = cpu_to_le16(info->send_credit_target); - packet->preferred_send_size = cpu_to_le32(info->max_send_size); - packet->max_receive_size = cpu_to_le32(info->max_receive_size); + packet->credits_requested = cpu_to_le16(sp->send_credit_target); + packet->preferred_send_size = cpu_to_le32(sp->max_send_size); + packet->max_receive_size = cpu_to_le32(sp->max_recv_size); packet->max_fragmented_size = - cpu_to_le32(info->max_fragmented_recv_size); + cpu_to_le32(sp->max_fragmented_recv_size); request->num_sge = 1; request->sge[0].addr = ib_dma_map_single( @@ -800,6 +803,7 @@ static int smbd_post_send(struct smbd_connection *info, struct smbd_request *request) { struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc, i; @@ -831,7 +835,7 @@ static int smbd_post_send(struct smbd_connection *info, } else /* Reset timer for idle connection after packet is sent */ mod_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); return rc; } @@ -841,6 +845,7 @@ static int smbd_post_send_iter(struct smbd_connection *info, int *_remaining_data_length) { struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int i, rc; int header_length; int data_length; @@ -868,7 +873,7 @@ wait_credit: wait_send_queue: wait_event(info->wait_post_send, - atomic_read(&info->send_pending) < info->send_credit_target || + atomic_read(&info->send_pending) < sp->send_credit_target || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { @@ -878,7 +883,7 @@ wait_send_queue: } if (unlikely(atomic_inc_return(&info->send_pending) > - info->send_credit_target)) { + sp->send_credit_target)) { atomic_dec(&info->send_pending); goto wait_send_queue; } @@ -917,7 +922,7 @@ wait_send_queue: /* Fill in the packet header */ packet = smbd_request_payload(request); - packet->credits_requested = cpu_to_le16(info->send_credit_target); + packet->credits_requested = cpu_to_le16(sp->send_credit_target); new_credits = manage_credits_prior_sending(info); atomic_add(new_credits, &info->receive_credits); @@ -1017,16 +1022,17 @@ static int smbd_post_recv( struct smbd_connection *info, struct smbd_response *response) { struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr recv_wr; int rc = -EIO; response->sge.addr = ib_dma_map_single( sc->ib.dev, response->packet, - info->max_receive_size, DMA_FROM_DEVICE); + sp->max_recv_size, DMA_FROM_DEVICE); if (ib_dma_mapping_error(sc->ib.dev, response->sge.addr)) return rc; - response->sge.length = info->max_receive_size; + response->sge.length = sp->max_recv_size; response->sge.lkey = sc->ib.pd->local_dma_lkey; response->cqe.done = recv_done; @@ -1274,6 +1280,8 @@ static void idle_connection_timer(struct work_struct *work) struct smbd_connection *info = container_of( work, struct smbd_connection, idle_timer_work.work); + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; if (info->keep_alive_requested != KEEP_ALIVE_NONE) { log_keep_alive(ERR, @@ -1288,7 +1296,7 @@ static void idle_connection_timer(struct work_struct *work) /* Setup the next idle timeout work */ queue_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); } /* @@ -1300,6 +1308,7 @@ void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct smbd_response *response; unsigned long flags; @@ -1308,6 +1317,7 @@ void smbd_destroy(struct TCP_Server_Info *server) return; } sc = &info->socket; + sp = &sc->parameters; log_rdma_event(INFO, "destroying rdma session\n"); if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { @@ -1349,7 +1359,7 @@ void smbd_destroy(struct TCP_Server_Info *server) log_rdma_event(INFO, "free receive buffers\n"); wait_event(info->wait_receive_queues, info->count_receive_queue + info->count_empty_packet_queue - == info->receive_credit_max); + == sp->recv_credit_max); destroy_receive_buffers(info); /* @@ -1437,6 +1447,8 @@ static void destroy_caches_and_workqueue(struct smbd_connection *info) #define MAX_NAME_LEN 80 static int allocate_caches_and_workqueue(struct smbd_connection *info) { + struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; char name[MAX_NAME_LEN]; int rc; @@ -1451,7 +1463,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) return -ENOMEM; info->request_mempool = - mempool_create(info->send_credit_target, mempool_alloc_slab, + mempool_create(sp->send_credit_target, mempool_alloc_slab, mempool_free_slab, info->request_cache); if (!info->request_mempool) goto out1; @@ -1461,13 +1473,13 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) kmem_cache_create( name, sizeof(struct smbd_response) + - info->max_receive_size, + sp->max_recv_size, 0, SLAB_HWCACHE_ALIGN, NULL); if (!info->response_cache) goto out2; info->response_mempool = - mempool_create(info->receive_credit_max, mempool_alloc_slab, + mempool_create(sp->recv_credit_max, mempool_alloc_slab, mempool_free_slab, info->response_cache); if (!info->response_mempool) goto out3; @@ -1477,7 +1489,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!info->workqueue) goto out4; - rc = allocate_receive_buffers(info, info->receive_credit_max); + rc = allocate_receive_buffers(info, sp->recv_credit_max); if (rc) { log_rdma_event(ERR, "failed to allocate receive buffers\n"); goto out5; @@ -1505,6 +1517,7 @@ static struct smbd_connection *_smbd_get_connection( int rc; struct smbd_connection *info; struct smbdirect_socket *sc; + struct smbdirect_socket_parameters *sp; struct rdma_conn_param conn_param; struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; @@ -1515,6 +1528,7 @@ static struct smbd_connection *_smbd_get_connection( if (!info) return NULL; sc = &info->socket; + sp = &sc->parameters; sc->status = SMBDIRECT_SOCKET_CONNECTING; rc = smbd_ia_open(info, dstaddr, port); @@ -1541,12 +1555,12 @@ static struct smbd_connection *_smbd_get_connection( goto config_failed; } - info->receive_credit_max = smbd_receive_credit_max; - info->send_credit_target = smbd_send_credit_target; - info->max_send_size = smbd_max_send_size; - info->max_fragmented_recv_size = smbd_max_fragmented_recv_size; - info->max_receive_size = smbd_max_receive_size; - info->keep_alive_interval = smbd_keep_alive_interval; + sp->recv_credit_max = smbd_receive_credit_max; + sp->send_credit_target = smbd_send_credit_target; + sp->max_send_size = smbd_max_send_size; + sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; + sp->max_recv_size = smbd_max_receive_size; + sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_MAX_SEND_SGE || sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_MAX_RECV_SGE) { @@ -1561,7 +1575,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, info, - info->send_credit_target, IB_POLL_SOFTIRQ); + sp->send_credit_target, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.send_cq)) { sc->ib.send_cq = NULL; goto alloc_cq_failed; @@ -1569,7 +1583,7 @@ static struct smbd_connection *_smbd_get_connection( sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, info, - info->receive_credit_max, IB_POLL_SOFTIRQ); + sp->recv_credit_max, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.recv_cq)) { sc->ib.recv_cq = NULL; goto alloc_cq_failed; @@ -1578,8 +1592,8 @@ static struct smbd_connection *_smbd_get_connection( memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; qp_attr.qp_context = info; - qp_attr.cap.max_send_wr = info->send_credit_target; - qp_attr.cap.max_recv_wr = info->receive_credit_max; + qp_attr.cap.max_send_wr = sp->send_credit_target; + qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_MAX_SEND_SGE; qp_attr.cap.max_recv_sge = SMBDIRECT_MAX_RECV_SGE; qp_attr.cap.max_inline_data = 0; @@ -1654,7 +1668,7 @@ static struct smbd_connection *_smbd_get_connection( init_waitqueue_head(&info->wait_send_queue); INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); queue_delayed_work(info->workqueue, &info->idle_timer_work, - info->keep_alive_interval*HZ); + msecs_to_jiffies(sp->keepalive_interval_msec)); init_waitqueue_head(&info->wait_send_pending); atomic_set(&info->send_pending, 0); @@ -1971,6 +1985,7 @@ int smbd_send(struct TCP_Server_Info *server, { struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; struct smb_rqst *rqst; struct iov_iter iter; unsigned int remaining_data_length, klen; @@ -1988,10 +2003,10 @@ int smbd_send(struct TCP_Server_Info *server, for (i = 0; i < num_rqst; i++) remaining_data_length += smb_rqst_len(server, &rqst_array[i]); - if (unlikely(remaining_data_length > info->max_fragmented_send_size)) { + if (unlikely(remaining_data_length > sp->max_fragmented_send_size)) { /* assertion: payload never exceeds negotiated maximum */ log_write(ERR, "payload size %d > max size %d\n", - remaining_data_length, info->max_fragmented_send_size); + remaining_data_length, sp->max_fragmented_send_size); return -EINVAL; } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index 3d325d73364a..75b3f491c3ad 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -69,15 +69,7 @@ struct smbd_connection { spinlock_t lock_new_credits_offered; int new_credits_offered; - /* Connection parameters defined in [MS-SMBD] 3.1.1.1 */ - int receive_credit_max; - int send_credit_target; - int max_send_size; - int max_fragmented_recv_size; - int max_fragmented_send_size; - int max_receive_size; - int keep_alive_interval; - int max_readwrite_size; + /* dynamic connection parameters defined in [MS-SMBD] 3.1.1.1 */ enum keep_alive_status keep_alive_requested; int protocol; atomic_t send_credits; From 66d590b828b1fd9fa337047ae58fe1c4c6f43609 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 2 Jun 2025 22:37:12 +0530 Subject: [PATCH 09/14] cifs: deal with the channel loading lag while picking channels Our current approach to select a channel for sending requests is this: 1. iterate all channels to find the min and max queue depth 2. if min and max are not the same, pick the channel with min depth 3. if min and max are same, round robin, as all channels are equally loaded The problem with this approach is that there's a lag between selecting a channel and sending the request (that increases the queue depth on the channel). While these numbers will eventually catch up, there could be a skew in the channel usage, depending on the application's I/O parallelism and the server's speed of handling requests. With sufficient parallelism, this lag can artificially increase the queue depth, thereby impacting the performance negatively. This change will change the step 1 above to start the iteration from the last selected channel. This is to reduce the skew in channel usage even in the presence of this lag. Fixes: ea90708d3cf3 ("cifs: use the least loaded channel for sending requests") Cc: Signed-off-by: Shyam Prasad N Signed-off-by: Steve French --- fs/smb/client/transport.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index 266af17aa7d9..191783f553ce 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -1018,14 +1018,16 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) uint index = 0; unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; struct TCP_Server_Info *server = NULL; - int i; + int i, start, cur; if (!ses) return NULL; spin_lock(&ses->chan_lock); + start = atomic_inc_return(&ses->chan_seq); for (i = 0; i < ses->chan_count; i++) { - server = ses->chans[i].server; + cur = (start + i) % ses->chan_count; + server = ses->chans[cur].server; if (!server || server->terminate) continue; @@ -1042,17 +1044,15 @@ struct TCP_Server_Info *cifs_pick_channel(struct cifs_ses *ses) */ if (server->in_flight < min_in_flight) { min_in_flight = server->in_flight; - index = i; + index = cur; } if (server->in_flight > max_in_flight) max_in_flight = server->in_flight; } /* if all channels are equally loaded, fall back to round-robin */ - if (min_in_flight == max_in_flight) { - index = (uint)atomic_inc_return(&ses->chan_seq); - index %= ses->chan_count; - } + if (min_in_flight == max_in_flight) + index = (uint)start % ses->chan_count; server = ses->chans[index].server; spin_unlock(&ses->chan_lock); From b5e3e6e28cf3853566ba5d816f79aba5be579158 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 2 Jun 2025 22:37:15 +0530 Subject: [PATCH 10/14] cifs: serialize other channels when query server interfaces is pending Today, during smb2_reconnect, session_mutex is released as soon as the tcon is reconnected and is in a good state. However, in case multichannel is enabled, there is also a query of server interfaces that follows. We've seen that this query can race with reconnects of other channels, causing them to step on each other with reconnects. This change extends the hold of session_mutex till after the query of server interfaces is complete. In order to avoid recursive smb2_reconnect checks during query ioctl, this change also introduces a session flag for sessions where such a query is in progress. Signed-off-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/cifsglob.h | 1 + fs/smb/client/smb2pdu.c | 24 ++++++++++++++++++------ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index ad7dd16db3e9..45e94e18f4d5 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1085,6 +1085,7 @@ struct cifs_chan { }; #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) +#define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) /* * Session structure. One of these for each uid session with a particular host diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 2bd814cd612b..96b32d276071 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -412,14 +412,19 @@ skip_sess_setup: if (!rc && (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) && server->ops->query_server_interfaces) { - mutex_unlock(&ses->session_mutex); - /* - * query server network interfaces, in case they change + * query server network interfaces, in case they change. + * Also mark the session as pending this update while the query + * is in progress. This will be used to avoid calling + * smb2_reconnect recursively. */ + ses->flags |= CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; xid = get_xid(); rc = server->ops->query_server_interfaces(xid, tcon, false); free_xid(xid); + ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + + mutex_unlock(&ses->session_mutex); if (rc == -EOPNOTSUPP && ses->chan_count > 1) { /* @@ -561,11 +566,18 @@ static int smb2_ioctl_req_init(u32 opcode, struct cifs_tcon *tcon, struct TCP_Server_Info *server, void **request_buf, unsigned int *total_len) { - /* Skip reconnect only for FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs */ - if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) { + /* + * Skip reconnect in one of the following cases: + * 1. For FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs + * 2. For FSCTL_QUERY_NETWORK_INTERFACE_INFO IOCTL when called from + * smb2_reconnect (indicated by CIFS_SES_FLAG_SCALE_CHANNELS ses flag) + */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO || + (opcode == FSCTL_QUERY_NETWORK_INTERFACE_INFO && + (tcon->ses->flags & CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES))) return __smb2_plain_req_init(SMB2_IOCTL, tcon, server, request_buf, total_len); - } + return smb2_plain_req_init(SMB2_IOCTL, tcon, server, request_buf, total_len); } From 42ca547b13a20e7cbb04fbdf8d5f089ac4bb35b7 Mon Sep 17 00:00:00 2001 From: Shyam Prasad N Date: Mon, 2 Jun 2025 22:37:17 +0530 Subject: [PATCH 11/14] cifs: do not disable interface polling on failure When a server has multichannel enabled, we keep polling the server for interfaces periodically. However, when this query fails, we disable the polling. This can be problematic as it takes away the chance for the server to start advertizing again. This change reschedules the delayed work, even if the current call failed. That way, multichannel sessions can recover. Signed-off-by: Shyam Prasad N Cc: stable@vger.kernel.org Signed-off-by: Steve French --- fs/smb/client/connect.c | 6 +----- fs/smb/client/smb2pdu.c | 9 +++++---- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 024817d40c5f..28bc33496623 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -116,13 +116,9 @@ static void smb2_query_server_interfaces(struct work_struct *work) rc = server->ops->query_server_interfaces(xid, tcon, false); free_xid(xid); - if (rc) { - if (rc == -EOPNOTSUPP) - return; - + if (rc) cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", __func__, rc); - } queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index 96b32d276071..a717be1626a3 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -424,6 +424,10 @@ skip_sess_setup: free_xid(xid); ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + /* regardless of rc value, setup polling */ + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); + mutex_unlock(&ses->session_mutex); if (rc == -EOPNOTSUPP && ses->chan_count > 1) { @@ -444,11 +448,8 @@ skip_sess_setup: if (ses->chan_max > ses->chan_count && ses->iface_count && !SERVER_IS_CHAN(server)) { - if (ses->chan_count == 1) { + if (ses->chan_count == 1) cifs_server_dbg(VFS, "supports multichannel now\n"); - queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, - (SMB_INTERFACE_POLL_INTERVAL * HZ)); - } cifs_try_adding_channels(ses); } From 1c6bbc45d856ac86be9c18194a8c5b7c56e41ebd Mon Sep 17 00:00:00 2001 From: Meetakshi Setiya Date: Tue, 3 Jun 2025 00:06:28 -0400 Subject: [PATCH 12/14] cifs: add documentation for smbdirect setup Document steps to use SMB over RDMA using the linux SMB client and KSMBD server Signed-off-by: Meetakshi Setiya Signed-off-by: Steve French --- Documentation/filesystems/smb/index.rst | 1 + Documentation/filesystems/smb/smbdirect.rst | 103 ++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 Documentation/filesystems/smb/smbdirect.rst diff --git a/Documentation/filesystems/smb/index.rst b/Documentation/filesystems/smb/index.rst index 1c8597a679ab..6df23b0e45c8 100644 --- a/Documentation/filesystems/smb/index.rst +++ b/Documentation/filesystems/smb/index.rst @@ -8,3 +8,4 @@ CIFS ksmbd cifsroot + smbdirect diff --git a/Documentation/filesystems/smb/smbdirect.rst b/Documentation/filesystems/smb/smbdirect.rst new file mode 100644 index 000000000000..ca6927c0b2c0 --- /dev/null +++ b/Documentation/filesystems/smb/smbdirect.rst @@ -0,0 +1,103 @@ +.. SPDX-License-Identifier: GPL-2.0 + +=========================== +SMB Direct - SMB3 over RDMA +=========================== + +This document describes how to set up the Linux SMB client and server to +use RDMA. + +Overview +======== +The Linux SMB kernel client supports SMB Direct, which is a transport +scheme for SMB3 that uses RDMA (Remote Direct Memory Access) to provide +high throughput and low latencies by bypassing the traditional TCP/IP +stack. +SMB Direct on the Linux SMB client can be tested against KSMBD - a +kernel-space SMB server. + +Installation +============= +- Install an RDMA device. As long as the RDMA device driver is supported + by the kernel, it should work. This includes both software emulators (soft + RoCE, soft iWARP) and hardware devices (InfiniBand, RoCE, iWARP). + +- Install a kernel with SMB Direct support. The first kernel release to + support SMB Direct on both the client and server side is 5.15. Therefore, + a distribution compatible with kernel 5.15 or later is required. + +- Install cifs-utils, which provides the `mount.cifs` command to mount SMB + shares. + +- Configure the RDMA stack + + Make sure that your kernel configuration has RDMA support enabled. Under + Device Drivers -> Infiniband support, update the kernel configuration to + enable Infiniband support. + + Enable the appropriate IB HCA support or iWARP adapter support, + depending on your hardware. + + If you are using InfiniBand, enable IP-over-InfiniBand support. + + For soft RDMA, enable either the soft iWARP (`RDMA _SIW`) or soft RoCE + (`RDMA_RXE`) module. Install the `iproute2` package and use the + `rdma link add` command to load the module and create an + RDMA interface. + + e.g. if your local ethernet interface is `eth0`, you can use: + + .. code-block:: bash + + sudo rdma link add siw0 type siw netdev eth0 + +- Enable SMB Direct support for both the server and the client in the kernel + configuration. + + Server Setup + + .. code-block:: text + + Network File Systems ---> + SMB3 server support + [*] Support for SMB Direct protocol + + Client Setup + + .. code-block:: text + + Network File Systems ---> + SMB3 and CIFS support (advanced network filesystem) + [*] SMB Direct support + +- Build and install the kernel. SMB Direct support will be enabled in the + cifs.ko and ksmbd.ko modules. + +Setup and Usage +================ + +- Set up and start a KSMBD server as described in the `KSMBD documentation + `_. + Also add the "server multi channel support = yes" parameter to ksmbd.conf. + +- On the client, mount the share with `rdma` mount option to use SMB Direct + (specify a SMB version 3.0 or higher using `vers`). + + For example: + + .. code-block:: bash + + mount -t cifs //server/share /mnt/point -o vers=3.1.1,rdma + +- To verify that the mount is using SMB Direct, you can check dmesg for the + following log line after mounting: + + .. code-block:: text + + CIFS: VFS: RDMA transport established + + Or, verify `rdma` mount option for the share in `/proc/mounts`: + + .. code-block:: bash + + cat /proc/mounts | grep cifs From e889a450a661281847517ee64ef8219b05972347 Mon Sep 17 00:00:00 2001 From: Paulo Alcantara Date: Thu, 5 Jun 2025 10:22:16 -0300 Subject: [PATCH 13/14] MAINTAINERS, mailmap: Update Paulo Alcantara's email address Update my email address in MAINTAINERS and .mailmap files. Signed-off-by: Paulo Alcantara Signed-off-by: Steve French --- .mailmap | 6 ++++++ MAINTAINERS | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 9ad98690876a..97747a910a37 100644 --- a/.mailmap +++ b/.mailmap @@ -596,6 +596,12 @@ Paul Mackerras Paul Mackerras Paul Moore Paul Moore +Paulo Alcantara +Paulo Alcantara +Paulo Alcantara +Paulo Alcantara +Paulo Alcantara +Paulo Alcantara Pavankumar Kondeti Peter A Jonsson Peter Oruba diff --git a/MAINTAINERS b/MAINTAINERS index 77ede8c0e30d..f8ccc9c22e51 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5963,7 +5963,7 @@ X: drivers/clk/clkdev.c COMMON INTERNET FILE SYSTEM CLIENT (CIFS and SMB3) M: Steve French M: Steve French -R: Paulo Alcantara (DFS, global name space) +R: Paulo Alcantara (DFS, global name space) R: Ronnie Sahlberg (directory leases, sparse files) R: Shyam Prasad N (multichannel) R: Tom Talpey (RDMA, smbdirect) @@ -9228,7 +9228,7 @@ F: include/linux/iomap.h FILESYSTEMS [NETFS LIBRARY] M: David Howells -M: Paulo Alcantara +M: Paulo Alcantara L: netfs@lists.linux.dev L: linux-fsdevel@vger.kernel.org S: Supported From 8e9d6efccdd728fb1193e4faada45dff03773608 Mon Sep 17 00:00:00 2001 From: Steve French Date: Tue, 3 Jun 2025 23:21:09 -0500 Subject: [PATCH 14/14] cifs: update internal version number to 2.55 Signed-off-by: Steve French --- fs/smb/client/cifsfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index ca435a3841b8..b9ec9fe16a98 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 54 -#define CIFS_VERSION "2.54" +#define SMB3_PRODUCT_BUILD 55 +#define CIFS_VERSION "2.55" #endif /* _CIFSFS_H */