From 8c1624b63a7d24142a2bbc3a5ee7e95f004ea36e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 20 Feb 2025 13:18:30 +0200 Subject: [PATCH 01/12] nvme-tcp: fix possible UAF in nvme_tcp_poll nvme_tcp_poll() may race with the send path error handler because it may complete the request while it is actively being polled for completion, resulting in a UAF panic [1]: We should make sure to stop polling when we see an error when trying to read from the socket. Hence make sure to propagate the error so that the block layer breaks the polling cycle. [1]: -- [35665.692310] nvme nvme2: failed to send request -13 [35665.702265] nvme nvme2: unsupported pdu type (3) [35665.702272] BUG: kernel NULL pointer dereference, address: 0000000000000000 [35665.702542] nvme nvme2: queue 1 receive failed: -22 [35665.703209] #PF: supervisor write access in kernel mode [35665.703213] #PF: error_code(0x0002) - not-present page [35665.703214] PGD 8000003801cce067 P4D 8000003801cce067 PUD 37e6f79067 PMD 0 [35665.703220] Oops: 0002 [#1] SMP PTI [35665.703658] nvme nvme2: starting error recovery [35665.705809] Hardware name: Inspur aaabbb/YZMB-00882-104, BIOS 4.1.26 09/22/2022 [35665.705812] Workqueue: kblockd blk_mq_requeue_work [35665.709172] RIP: 0010:_raw_spin_lock+0xc/0x30 [35665.715788] Call Trace: [35665.716201] [35665.716613] ? show_trace_log_lvl+0x1c1/0x2d9 [35665.717049] ? show_trace_log_lvl+0x1c1/0x2d9 [35665.717457] ? blk_mq_request_bypass_insert+0x2c/0xb0 [35665.717950] ? __die_body.cold+0x8/0xd [35665.718361] ? page_fault_oops+0xac/0x140 [35665.718749] ? blk_mq_start_request+0x30/0xf0 [35665.719144] ? nvme_tcp_queue_rq+0xc7/0x170 [nvme_tcp] [35665.719547] ? exc_page_fault+0x62/0x130 [35665.719938] ? asm_exc_page_fault+0x22/0x30 [35665.720333] ? _raw_spin_lock+0xc/0x30 [35665.720723] blk_mq_request_bypass_insert+0x2c/0xb0 [35665.721101] blk_mq_requeue_work+0xa5/0x180 [35665.721451] process_one_work+0x1e8/0x390 [35665.721809] worker_thread+0x53/0x3d0 [35665.722159] ? process_one_work+0x390/0x390 [35665.722501] kthread+0x124/0x150 [35665.722849] ? set_kthread_struct+0x50/0x50 [35665.723182] ret_from_fork+0x1f/0x30 Reported-by: Zhang Guanghui Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8a9131c95a3d..8c14018201db 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2699,6 +2699,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) { struct nvme_tcp_queue *queue = hctx->driver_data; struct sock *sk = queue->sock->sk; + int ret; if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; @@ -2706,9 +2707,9 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx, struct io_comp_batch *iob) set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); - nvme_tcp_try_recv(queue); + ret = nvme_tcp_try_recv(queue); clear_bit(NVME_TCP_Q_POLLING, &queue->flags); - return queue->nr_cqe; + return ret < 0 ? ret : queue->nr_cqe; } static int nvme_tcp_get_address(struct nvme_ctrl *ctrl, char *buf, int size) From 6a3572e10f740acd48e2713ef37e92186a3ce5e8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 13 Feb 2025 01:04:43 +0800 Subject: [PATCH 02/12] nvme-pci: clean up CMBMSC when registering CMB fails CMB decoding should get disabled when the CMB block isn't successfully registered to P2P DMA subsystem. Clean up the CMBMSC register in this error handling codepath to disable CMB decoding (and CMBLOC/CMBSZ registers). Signed-off-by: Icenowy Zheng Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 950289405ef2..218506e3dabe 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2003,6 +2003,7 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); + hi_lo_writeq(0, dev->bar + NVME_REG_CMBMSC); return; } From 56cf7ef0d490b28fad8f8629fc135c5ab7c9f54e Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Thu, 13 Feb 2025 01:04:44 +0800 Subject: [PATCH 03/12] nvme-pci: skip CMB blocks incompatible with PCI P2P DMA The PCI P2PDMA code will register the CMB block to the memory hot-plugging subsystem, which have an alignment requirement. Memory blocks that do not satisfy this alignment requirement (usually 2MB) will lead to a WARNING from memory hotplugging. Verify the CMB block's address and size against the alignment and only try to send CMB blocks compatible with it to prevent this warning. Tested on Intel DC D4502 SSD, which has a 512K CMB block that is too small for memory hotplugging (thus PCI P2PDMA). Signed-off-by: Icenowy Zheng Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 218506e3dabe..640590b21728 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1982,6 +1982,18 @@ static void nvme_map_cmb(struct nvme_dev *dev) if (offset > bar_size) return; + /* + * Controllers may support a CMB size larger than their BAR, for + * example, due to being behind a bridge. Reduce the CMB to the + * reported size of the BAR + */ + size = min(size, bar_size - offset); + + if (!IS_ALIGNED(size, memremap_compat_align()) || + !IS_ALIGNED(pci_resource_start(pdev, bar), + memremap_compat_align())) + return; + /* * Tell the controller about the host side address mapping the CMB, * and enable CMB decoding for the NVMe 1.4+ scheme: @@ -1992,14 +2004,6 @@ static void nvme_map_cmb(struct nvme_dev *dev) dev->bar + NVME_REG_CMBMSC); } - /* - * Controllers may support a CMB size larger than their BAR, - * for example, due to being behind a bridge. Reduce the CMB to - * the reported size of the BAR - */ - if (size > bar_size - offset) - size = bar_size - offset; - if (pci_p2pdma_add_resource(pdev, bar, size, offset)) { dev_warn(dev->ctrl.device, "failed to register the CMB\n"); From 00817f0f1c45b007965f5676b9a2013bb39c7228 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 24 Feb 2025 17:13:30 -0800 Subject: [PATCH 04/12] nvme-ioctl: fix leaked requests on mapping error All the callers assume nvme_map_user_request() frees the request on a failure. This wasn't happening on invalid metadata or io_uring command flags, so we've been leaking those requests. Fixes: 23fd22e55b767b ("nvme: wire up fixed buffer support for nvme passthrough") Fixes: 7c2fd76048e95d ("nvme: fix metadata handling in nvme-passthrough") Reviewed-by: Damien Le Moal Reviewed-by: Kanchan Joshi Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/ioctl.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index b1b46c2713e1..24e2c702da7a 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -128,8 +128,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { - if (!supports_metadata) - return -EINVAL; + if (!supports_metadata) { + ret = -EINVAL; + goto out; + } if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); @@ -139,8 +141,10 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, struct iov_iter iter; /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) - return -EINVAL; + if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { + ret = -EINVAL; + goto out; + } ret = io_uring_cmd_import_fixed(ubuffer, bufflen, rq_data_dir(req), &iter, ioucmd); if (ret < 0) From 0979ff3676b1b4e6a20970bc265491d23c2da42b Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 27 Feb 2025 20:00:05 +0100 Subject: [PATCH 05/12] nvmet: remove old function prototype nvmet_subsys_nsid_exists() doesn't exist anymore Fixes: 74d16965d7ac ("nvmet-loop: avoid using mutex in IO hotpath") Signed-off-by: Maurizio Lombardi Signed-off-by: Keith Busch --- drivers/nvme/target/nvmet.h | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d2c1233981e1..fcf4f460dc9a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -647,7 +647,6 @@ void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, struct nvmet_host *host); void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, u8 event_info, u8 log_page); -bool nvmet_subsys_nsid_exists(struct nvmet_subsys *subsys, u32 nsid); #define NVMET_MIN_QUEUE_SIZE 16 #define NVMET_MAX_QUEUE_SIZE 1024 From afb41b08c44e5386f2f52fa859010ac4afd2b66f Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Mon, 24 Feb 2025 15:40:58 +0100 Subject: [PATCH 06/12] nvme-tcp: Fix a C2HTermReq error message In H2CTermReq, a FES with value 0x05 means "R2T Limit Exceeded"; but in C2HTermReq the same value has a different meaning (Data Transfer Limit Exceeded). Fixes: 84e009042d0f ("nvme-tcp: add basic support for the C2HTermReq PDU") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 8c14018201db..1094cbbec169 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -775,7 +775,7 @@ static void nvme_tcp_handle_c2h_term(struct nvme_tcp_queue *queue, [NVME_TCP_FES_PDU_SEQ_ERR] = "PDU Sequence Error", [NVME_TCP_FES_HDR_DIGEST_ERR] = "Header Digest Error", [NVME_TCP_FES_DATA_OUT_OF_RANGE] = "Data Transfer Out Of Range", - [NVME_TCP_FES_R2T_LIMIT_EXCEEDED] = "R2T Limit Exceeded", + [NVME_TCP_FES_DATA_LIMIT_EXCEEDED] = "Data Transfer Limit Exceeded", [NVME_TCP_FES_UNSUPPORTED_PARAM] = "Unsupported Parameter", }; From ad95bab0cd28ed77c2c0d0b6e76e03e031391064 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 26 Feb 2025 14:42:18 +0100 Subject: [PATCH 07/12] nvme-tcp: fix potential memory corruption in nvme_tcp_recv_pdu() nvme_tcp_recv_pdu() doesn't check the validity of the header length. When header digests are enabled, a target might send a packet with an invalid header length (e.g. 255), causing nvme_tcp_verify_hdgst() to access memory outside the allocated area and cause memory corruptions by overwriting it with the calculated digest. Fix this by rejecting packets with an unexpected header length. Fixes: 3f2304f8c6d6 ("nvme-tcp: add NVMe over TCP host driver") Signed-off-by: Maurizio Lombardi Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 1094cbbec169..23f11527d29d 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -217,6 +217,19 @@ static inline int nvme_tcp_queue_id(struct nvme_tcp_queue *queue) return queue - queue->ctrl->queues; } +static inline bool nvme_tcp_recv_pdu_supported(enum nvme_tcp_pdu_type type) +{ + switch (type) { + case nvme_tcp_c2h_term: + case nvme_tcp_c2h_data: + case nvme_tcp_r2t: + case nvme_tcp_rsp: + return true; + default: + return false; + } +} + /* * Check if the queue is TLS encrypted */ @@ -818,6 +831,16 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, return 0; hdr = queue->pdu; + if (unlikely(hdr->hlen != sizeof(struct nvme_tcp_rsp_pdu))) { + if (!nvme_tcp_recv_pdu_supported(hdr->type)) + goto unsupported_pdu; + + dev_err(queue->ctrl->ctrl.device, + "pdu type %d has unexpected header length (%d)\n", + hdr->type, hdr->hlen); + return -EPROTO; + } + if (unlikely(hdr->type == nvme_tcp_c2h_term)) { /* * C2HTermReq never includes Header or Data digests. @@ -850,10 +873,13 @@ static int nvme_tcp_recv_pdu(struct nvme_tcp_queue *queue, struct sk_buff *skb, nvme_tcp_init_recv_ctx(queue); return nvme_tcp_handle_r2t(queue, (void *)queue->pdu); default: - dev_err(queue->ctrl->ctrl.device, - "unsupported pdu type (%d)\n", hdr->type); - return -EINVAL; + goto unsupported_pdu; } + +unsupported_pdu: + dev_err(queue->ctrl->ctrl.device, + "unsupported pdu type (%d)\n", hdr->type); + return -EINVAL; } static inline void nvme_tcp_end_request(struct request *rq, u16 status) From a16f88964c647103dad7743a484b216d488a6352 Mon Sep 17 00:00:00 2001 From: Meir Elisha Date: Wed, 26 Feb 2025 09:28:12 +0200 Subject: [PATCH 08/12] nvmet-tcp: Fix a possible sporadic response drops in weakly ordered arch The order in which queue->cmd and rcv_state are updated is crucial. If these assignments are reordered by the compiler, the worker might not get queued in nvmet_tcp_queue_response(), hanging the IO. to enforce the the correct reordering, set rcv_state using smp_store_release(). Fixes: bdaf13279192 ("nvmet-tcp: fix a segmentation fault during io parsing error") Signed-off-by: Meir Elisha Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/target/tcp.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 7c51c2a8c109..4f9cac8a5abe 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -571,10 +571,16 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) struct nvmet_tcp_cmd *cmd = container_of(req, struct nvmet_tcp_cmd, req); struct nvmet_tcp_queue *queue = cmd->queue; + enum nvmet_tcp_recv_state queue_state; + struct nvmet_tcp_cmd *queue_cmd; struct nvme_sgl_desc *sgl; u32 len; - if (unlikely(cmd == queue->cmd)) { + /* Pairs with store_release in nvmet_prepare_receive_pdu() */ + queue_state = smp_load_acquire(&queue->rcv_state); + queue_cmd = READ_ONCE(queue->cmd); + + if (unlikely(cmd == queue_cmd)) { sgl = &cmd->req.cmd->common.dptr.sgl; len = le32_to_cpu(sgl->length); @@ -583,7 +589,7 @@ static void nvmet_tcp_queue_response(struct nvmet_req *req) * Avoid using helpers, this might happen before * nvmet_req_init is completed. */ - if (queue->rcv_state == NVMET_TCP_RECV_PDU && + if (queue_state == NVMET_TCP_RECV_PDU && len && len <= cmd->req.port->inline_data_size && nvme_is_write(cmd->req.cmd)) return; @@ -847,8 +853,9 @@ static void nvmet_prepare_receive_pdu(struct nvmet_tcp_queue *queue) { queue->offset = 0; queue->left = sizeof(struct nvme_tcp_hdr); - queue->cmd = NULL; - queue->rcv_state = NVMET_TCP_RECV_PDU; + WRITE_ONCE(queue->cmd, NULL); + /* Ensure rcv_state is visible only after queue->cmd is set */ + smp_store_release(&queue->rcv_state, NVMET_TCP_RECV_PDU); } static void nvmet_tcp_free_crypto(struct nvmet_tcp_queue *queue) From 5ac60242b0173be83709603ebaf27a473f16c4e4 Mon Sep 17 00:00:00 2001 From: Uday Shankar Date: Tue, 4 Mar 2025 14:34:26 -0700 Subject: [PATCH 09/12] ublk: set_params: properly check if parameters can be applied The parameters set by the set_params call are only applied to the block device in the start_dev call. So if a device has already been started, a subsequently issued set_params on that device will not have the desired effect, and should return an error. There is an existing check for this - set_params fails on devices in the LIVE state. But this check is not sufficient to cover the recovery case. In this case, the device will be in the QUIESCED or FAIL_IO states, so set_params will succeed. But this success is misleading, because the parameters will not be applied, since the device has already been started (by a previous ublk server). The bit UB_STATE_USED is set on completion of the start_dev; use it to detect and fail set_params commands which arrive too late to be applied (after start_dev). Signed-off-by: Uday Shankar Fixes: 0aa73170eba5 ("ublk_drv: add SET_PARAMS/GET_PARAMS control command") Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/20250304-set_params-v1-1-17b5e0887606@purestorage.com Signed-off-by: Jens Axboe --- drivers/block/ublk_drv.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index 529085181f35..ca9a67b5b537 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -2715,9 +2715,12 @@ static int ublk_ctrl_set_params(struct ublk_device *ub, if (ph.len > sizeof(struct ublk_params)) ph.len = sizeof(struct ublk_params); - /* parameters can only be changed when device isn't live */ mutex_lock(&ub->mutex); - if (ub->dev_info.state == UBLK_S_DEV_LIVE) { + if (test_bit(UB_STATE_USED, &ub->state)) { + /* + * Parameters can only be changed when device hasn't + * been started yet + */ ret = -EACCES; } else if (copy_from_user(&ub->params, argp, ph.len)) { ret = -EFAULT; From e06472bab2a5393430cc2fbc3211cd3602422c1e Mon Sep 17 00:00:00 2001 From: Olivier Gayot Date: Wed, 5 Mar 2025 10:21:54 +0800 Subject: [PATCH 10/12] block: fix conversion of GPT partition name to 7-bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The utf16_le_to_7bit function claims to, naively, convert a UTF-16 string to a 7-bit ASCII string. By naively, we mean that it: * drops the first byte of every character in the original UTF-16 string * checks if all characters are printable, and otherwise replaces them by exclamation mark "!". This means that theoretically, all characters outside the 7-bit ASCII range should be replaced by another character. Examples: * lower-case alpha (ɒ) 0x0252 becomes 0x52 (R) * ligature OE (œ) 0x0153 becomes 0x53 (S) * hangul letter pieup (ㅂ) 0x3142 becomes 0x42 (B) * upper-case gamma (Ɣ) 0x0194 becomes 0x94 (not printable) so gets replaced by "!" The result of this conversion for the GPT partition name is passed to user-space as PARTNAME via udev, which is confusing and feels questionable. However, there is a flaw in the conversion function itself. By dropping one byte of each character and using isprint() to check if the remaining byte corresponds to a printable character, we do not actually guarantee that the resulting character is 7-bit ASCII. This happens because we pass 8-bit characters to isprint(), which in the kernel returns 1 for many values > 0x7f - as defined in ctype.c. This results in many values which should be replaced by "!" to be kept as-is, despite not being valid 7-bit ASCII. Examples: * e with acute accent (é) 0x00E9 becomes 0xE9 - kept as-is because isprint(0xE9) returns 1. * euro sign (€) 0x20AC becomes 0xAC - kept as-is because isprint(0xAC) returns 1. This way has broken pyudev utility[1], fixes it by using a mask of 7 bits instead of 8 bits before calling isprint. Link: https://github.com/pyudev/pyudev/issues/490#issuecomment-2685794648 [1] Link: https://lore.kernel.org/linux-block/4cac90c2-e414-4ebb-ae62-2a4589d9dc6e@canonical.com/ Cc: Mulhern Cc: Davidlohr Bueso Cc: stable@vger.kernel.org Signed-off-by: Olivier Gayot Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20250305022154.3903128-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/partitions/efi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 5e9be13a56a8..7acba66eed48 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out) out[size] = 0; while (i < size) { - u8 c = le16_to_cpu(in[i]) & 0xff; + u8 c = le16_to_cpu(in[i]) & 0x7f; if (c && !isprint(c)) c = '!'; From 528361c49962708a60f51a1afafeb00987cebedf Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 5 Mar 2025 18:52:59 +0300 Subject: [PATCH 11/12] nvme-tcp: fix signedness bug in nvme_tcp_init_connection() The kernel_recvmsg() function returns an int which could be either negative error codes or the number of bytes received. The problem is that the condition: if (ret < sizeof(*icresp)) { is type promoted to type unsigned long and negative values are treated as high positive values which is success, when they should be treated as failure. Handle invalid positive returns separately from negative error codes to avoid this problem. Fixes: 578539e09690 ("nvme-tcp: fix connect failure on receiving partial ICResp PDU") Signed-off-by: Dan Carpenter Reviewed-by: Caleb Sander Mateos Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Keith Busch --- drivers/nvme/host/tcp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 23f11527d29d..327f3f2f5399 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1521,11 +1521,11 @@ static int nvme_tcp_init_connection(struct nvme_tcp_queue *queue) msg.msg_flags = MSG_WAITALL; ret = kernel_recvmsg(queue->sock, &msg, &iov, 1, iov.iov_len, msg.msg_flags); - if (ret < sizeof(*icresp)) { + if (ret >= 0 && ret < sizeof(*icresp)) + ret = -ECONNRESET; + if (ret < 0) { pr_warn("queue %d: failed to receive icresp, error %d\n", nvme_tcp_queue_id(queue), ret); - if (ret >= 0) - ret = -ECONNRESET; goto free_icresp; } ret = -ENOTCONN; From e7112524e5e885181cc5ae4d258f33b9dbe0b907 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 6 Mar 2025 08:27:51 -0800 Subject: [PATCH 12/12] block: Name the RQF flags enum Commit 5f89154e8e9e3445f9b59 ("block: Use enum to define RQF_x bit indexes") converted the RQF flags to an anonymous enum, which was a beneficial change. This patch goes one step further by naming the enum as "rqf_flags". This naming enables exporting these flags to BPF clients, eliminating the need to duplicate these flags in BPF code. Instead, BPF clients can now access the same kernel-side values through CO:RE (Compile Once, Run Everywhere), as shown in this example: rqf_stats = bpf_core_enum_value(enum rqf_flags, __RQF_STATS) Suggested-by: Yonghong Song Signed-off-by: Breno Leitao Link: https://lore.kernel.org/r/20250306-rqf_flags-v1-1-bbd64918b406@debian.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fa2a76cc2f73..71f4f0cc3dac 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -28,7 +28,7 @@ typedef enum rq_end_io_ret (rq_end_io_fn)(struct request *, blk_status_t); typedef __u32 __bitwise req_flags_t; /* Keep rqf_name[] in sync with the definitions below */ -enum { +enum rqf_flags { /* drive already may have started this one */ __RQF_STARTED, /* request for flush sequence */