2017-06-20 09:14:15 +03:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2017 Mellanox Technologies. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the names of the copyright holders nor the names of its
|
|
|
|
* contributors may be used to endorse or promote products derived from
|
|
|
|
* this software without specific prior written permission.
|
|
|
|
*
|
|
|
|
* Alternatively, this software may be distributed under the terms of the
|
|
|
|
* GNU General Public License ("GPL") version 2 as published by the Free
|
|
|
|
* Software Foundation.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
2017-08-14 14:57:39 -06:00
|
|
|
#include <linux/module.h>
|
2018-01-28 11:17:24 +02:00
|
|
|
#include <linux/pid.h>
|
|
|
|
#include <linux/pid_namespace.h>
|
2019-02-15 11:03:53 -08:00
|
|
|
#include <linux/mutex.h>
|
2017-06-20 09:59:14 +03:00
|
|
|
#include <net/netlink.h>
|
2018-03-01 13:57:44 -08:00
|
|
|
#include <rdma/rdma_cm.h>
|
2017-06-20 09:14:15 +03:00
|
|
|
#include <rdma/rdma_netlink.h>
|
|
|
|
|
|
|
|
#include "core_priv.h"
|
2018-03-01 13:57:44 -08:00
|
|
|
#include "cma_priv.h"
|
2019-02-18 22:25:47 +02:00
|
|
|
#include "restrack.h"
|
2020-01-08 19:21:58 +02:00
|
|
|
#include "uverbs.h"
|
2017-06-20 09:14:15 +03:00
|
|
|
|
2023-10-09 13:43:58 +03:00
|
|
|
/*
|
|
|
|
* This determines whether a non-privileged user is allowed to specify a
|
|
|
|
* controlled QKEY or not, when true non-privileged user is allowed to specify
|
|
|
|
* a controlled QKEY.
|
|
|
|
*/
|
|
|
|
static bool privileged_qkey;
|
|
|
|
|
2019-10-16 09:23:06 +03:00
|
|
|
typedef int (*res_fill_func_t)(struct sk_buff*, bool,
|
|
|
|
struct rdma_restrack_entry*, uint32_t);
|
|
|
|
|
2019-06-19 09:20:49 -04:00
|
|
|
/*
|
|
|
|
* Sort array elements by the netlink attribute name
|
|
|
|
*/
|
2017-06-20 09:59:14 +03:00
|
|
|
static const struct nla_policy nldev_policy[RDMA_NLDEV_ATTR_MAX] = {
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_CHARDEV] = { .type = NLA_U64 },
|
|
|
|
[RDMA_NLDEV_ATTR_CHARDEV_ABI] = { .type = NLA_U64 },
|
|
|
|
[RDMA_NLDEV_ATTR_CHARDEV_NAME] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_CHARDEV_TYPE] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE },
|
2019-07-08 13:59:04 +03:00
|
|
|
[RDMA_NLDEV_ATTR_DEV_DIM] = { .type = NLA_U8 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_DEV_INDEX] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = IB_DEVICE_NAME_MAX },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_DEV_NODE_TYPE] = { .type = NLA_U8 },
|
|
|
|
[RDMA_NLDEV_ATTR_DEV_PROTOCOL] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_DRIVER] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_ENTRY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE] = { .type = NLA_U8 },
|
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_STRING] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_S32] = { .type = NLA_S32 },
|
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_S64] = { .type = NLA_S64 },
|
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_U32] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_U64] = { .type = NLA_U64 },
|
|
|
|
[RDMA_NLDEV_ATTR_FW_VERSION] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_LID] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_LINK_TYPE] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = IFNAMSIZ },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_LMC] = { .type = NLA_U8 },
|
|
|
|
[RDMA_NLDEV_ATTR_NDEV_INDEX] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_NDEV_NAME] = { .type = NLA_NUL_STRING,
|
|
|
|
.len = IFNAMSIZ },
|
|
|
|
[RDMA_NLDEV_ATTR_NODE_GUID] = { .type = NLA_U64 },
|
|
|
|
[RDMA_NLDEV_ATTR_PORT_INDEX] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_PORT_PHYS_STATE] = { .type = NLA_U8 },
|
|
|
|
[RDMA_NLDEV_ATTR_PORT_STATE] = { .type = NLA_U8 },
|
2018-03-01 13:57:44 -08:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CM_ID] = { .type = NLA_NESTED },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CM_IDN] = { .type = NLA_U32 },
|
2018-03-01 13:57:44 -08:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY] = { .type = NLA_NESTED },
|
2018-03-01 13:57:51 -08:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CQ] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_CQE] = { .type = NLA_U32 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CQN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_CQ_ENTRY] = { .type = NLA_NESTED },
|
2021-04-18 16:41:23 +03:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CTX] = { .type = NLA_NESTED },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CTXN] = { .type = NLA_U32 },
|
2021-04-18 16:41:23 +03:00
|
|
|
[RDMA_NLDEV_ATTR_RES_CTX_ENTRY] = { .type = NLA_NESTED },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_DST_ADDR] = {
|
|
|
|
.len = sizeof(struct __kernel_sockaddr_storage) },
|
2018-03-01 13:58:13 -08:00
|
|
|
[RDMA_NLDEV_ATTR_RES_IOVA] = { .type = NLA_U64 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_KERN_NAME] = { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_LKEY] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_LQPN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_MR] = { .type = NLA_NESTED },
|
2018-03-01 13:58:13 -08:00
|
|
|
[RDMA_NLDEV_ATTR_RES_MRLEN] = { .type = NLA_U64 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_MRN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_MR_ENTRY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE] = { .type = NLA_U8 },
|
2018-03-01 13:58:28 -08:00
|
|
|
[RDMA_NLDEV_ATTR_RES_PD] = { .type = NLA_NESTED },
|
2019-02-18 22:25:49 +02:00
|
|
|
[RDMA_NLDEV_ATTR_RES_PDN] = { .type = NLA_U32 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_PD_ENTRY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_PID] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_POLL_CTX] = { .type = NLA_U8 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_PS] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_QP] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_QP_ENTRY] = { .type = NLA_NESTED },
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
[RDMA_NLDEV_ATTR_RES_RAW] = { .type = NLA_BINARY },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_RKEY] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_RQPN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_RQ_PSN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SQ_PSN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SRC_ADDR] = {
|
|
|
|
.len = sizeof(struct __kernel_sockaddr_storage) },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_STATE] = { .type = NLA_U8 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SUMMARY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR]= { .type = NLA_U64 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME]= { .type = NLA_NUL_STRING,
|
2019-06-21 17:00:44 -04:00
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_TYPE] = { .type = NLA_U8 },
|
2024-04-16 15:03:50 +03:00
|
|
|
[RDMA_NLDEV_ATTR_RES_SUBTYPE] = { .type = NLA_NUL_STRING,
|
|
|
|
.len = RDMA_NLDEV_ATTR_EMPTY_STRING },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY]= { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_USECNT] = { .type = NLA_U64 },
|
2021-04-18 16:41:25 +03:00
|
|
|
[RDMA_NLDEV_ATTR_RES_SRQ] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SRQN] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_RES_SRQ_ENTRY] = { .type = NLA_NESTED },
|
2021-04-18 16:41:26 +03:00
|
|
|
[RDMA_NLDEV_ATTR_MIN_RANGE] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_MAX_RANGE] = { .type = NLA_U32 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_SM_LID] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_SUBNET_PREFIX] = { .type = NLA_U64 },
|
2019-07-02 13:02:39 +03:00
|
|
|
[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_MODE] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_RES] = { .type = NLA_U32 },
|
2019-07-02 13:02:40 +03:00
|
|
|
[RDMA_NLDEV_ATTR_STAT_COUNTER] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY] = { .type = NLA_NESTED },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME] = { .type = NLA_NUL_STRING },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE] = { .type = NLA_U64 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_ATTR_SYS_IMAGE_GUID] = { .type = NLA_U64 },
|
2019-06-13 21:38:19 -03:00
|
|
|
[RDMA_NLDEV_ATTR_UVERBS_DRIVER_ID] = { .type = NLA_U32 },
|
2019-06-19 09:20:49 -04:00
|
|
|
[RDMA_NLDEV_NET_NS_FD] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_SYS_ATTR_NETNS_MODE] = { .type = NLA_U8 },
|
2021-04-18 15:10:25 +03:00
|
|
|
[RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK] = { .type = NLA_U8 },
|
2021-10-08 15:24:33 +03:00
|
|
|
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX] = { .type = NLA_U32 },
|
|
|
|
[RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC] = { .type = NLA_U8 },
|
2023-10-09 13:43:58 +03:00
|
|
|
[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE] = { .type = NLA_U8 },
|
2024-04-16 15:03:50 +03:00
|
|
|
[RDMA_NLDEV_ATTR_DRIVER_DETAILS] = { .type = NLA_U8 },
|
2024-06-16 19:08:40 +03:00
|
|
|
[RDMA_NLDEV_ATTR_DEV_TYPE] = { .type = NLA_U8 },
|
2024-06-16 19:08:41 +03:00
|
|
|
[RDMA_NLDEV_ATTR_PARENT_NAME] = { .type = NLA_NUL_STRING },
|
2024-07-01 15:40:48 +03:00
|
|
|
[RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE] = { .type = NLA_U8 },
|
2024-09-09 20:30:24 +03:00
|
|
|
[RDMA_NLDEV_ATTR_EVENT_TYPE] = { .type = NLA_U8 },
|
2025-03-13 16:18:43 +02:00
|
|
|
[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED] = { .type = NLA_U8 },
|
2017-06-20 09:59:14 +03:00
|
|
|
};
|
|
|
|
|
2018-05-03 08:41:42 -07:00
|
|
|
static int put_driver_name_print_type(struct sk_buff *msg, const char *name,
|
|
|
|
enum rdma_nldev_print_type print_type)
|
|
|
|
{
|
|
|
|
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, name))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
if (print_type != RDMA_NLDEV_PRINT_TYPE_UNSPEC &&
|
|
|
|
nla_put_u8(msg, RDMA_NLDEV_ATTR_DRIVER_PRINT_TYPE, print_type))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int _rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name,
|
|
|
|
enum rdma_nldev_print_type print_type,
|
|
|
|
u32 value)
|
|
|
|
{
|
|
|
|
if (put_driver_name_print_type(msg, name, print_type))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DRIVER_U32, value))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int _rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name,
|
|
|
|
enum rdma_nldev_print_type print_type,
|
|
|
|
u64 value)
|
|
|
|
{
|
|
|
|
if (put_driver_name_print_type(msg, name, print_type))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_DRIVER_U64, value,
|
|
|
|
RDMA_NLDEV_ATTR_PAD))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-10-16 09:23:07 +03:00
|
|
|
int rdma_nl_put_driver_string(struct sk_buff *msg, const char *name,
|
|
|
|
const char *str)
|
|
|
|
{
|
|
|
|
if (put_driver_name_print_type(msg, name,
|
|
|
|
RDMA_NLDEV_PRINT_TYPE_UNSPEC))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DRIVER_STRING, str))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_nl_put_driver_string);
|
|
|
|
|
2018-05-03 08:41:42 -07:00
|
|
|
int rdma_nl_put_driver_u32(struct sk_buff *msg, const char *name, u32 value)
|
|
|
|
{
|
|
|
|
return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
|
|
|
|
value);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_nl_put_driver_u32);
|
|
|
|
|
|
|
|
int rdma_nl_put_driver_u32_hex(struct sk_buff *msg, const char *name,
|
|
|
|
u32 value)
|
|
|
|
{
|
|
|
|
return _rdma_nl_put_driver_u32(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
|
|
|
|
value);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_nl_put_driver_u32_hex);
|
|
|
|
|
|
|
|
int rdma_nl_put_driver_u64(struct sk_buff *msg, const char *name, u64 value)
|
|
|
|
{
|
|
|
|
return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_UNSPEC,
|
|
|
|
value);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_nl_put_driver_u64);
|
|
|
|
|
|
|
|
int rdma_nl_put_driver_u64_hex(struct sk_buff *msg, const char *name, u64 value)
|
|
|
|
{
|
|
|
|
return _rdma_nl_put_driver_u64(msg, name, RDMA_NLDEV_PRINT_TYPE_HEX,
|
|
|
|
value);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_nl_put_driver_u64_hex);
|
|
|
|
|
2023-10-09 13:43:58 +03:00
|
|
|
bool rdma_nl_get_privileged_qkey(void)
|
|
|
|
{
|
2025-06-26 21:58:11 +03:00
|
|
|
return privileged_qkey;
|
2023-10-09 13:43:58 +03:00
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_nl_get_privileged_qkey);
|
|
|
|
|
2018-01-01 13:07:14 +02:00
|
|
|
static int fill_nldev_handle(struct sk_buff *msg, struct ib_device *device)
|
2017-06-20 09:59:14 +03:00
|
|
|
{
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index))
|
|
|
|
return -EMSGSIZE;
|
2018-09-20 16:42:25 -06:00
|
|
|
if (nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
|
|
|
|
dev_name(&device->dev)))
|
2017-06-20 09:59:14 +03:00
|
|
|
return -EMSGSIZE;
|
2018-01-01 13:07:14 +02:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_dev_info(struct sk_buff *msg, struct ib_device *device)
|
|
|
|
{
|
|
|
|
char fw[IB_FW_VERSION_NAME_MAX];
|
2019-04-02 21:50:34 +03:00
|
|
|
int ret = 0;
|
2021-03-01 09:04:20 +02:00
|
|
|
u32 port;
|
2018-01-01 13:07:14 +02:00
|
|
|
|
|
|
|
if (fill_nldev_handle(msg, device))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2017-06-20 09:59:14 +03:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, rdma_end_port(device)))
|
|
|
|
return -EMSGSIZE;
|
2017-06-20 14:47:08 +03:00
|
|
|
|
|
|
|
BUILD_BUG_ON(sizeof(device->attrs.device_cap_flags) != sizeof(u64));
|
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
|
2018-05-03 08:40:49 -07:00
|
|
|
device->attrs.device_cap_flags,
|
|
|
|
RDMA_NLDEV_ATTR_PAD))
|
2017-06-20 14:47:08 +03:00
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2017-06-27 16:58:59 +03:00
|
|
|
ib_get_device_fw_str(device, fw);
|
2018-03-27 20:40:49 +03:00
|
|
|
/* Device without FW has strlen(fw) = 0 */
|
2017-06-27 16:58:59 +03:00
|
|
|
if (strlen(fw) && nla_put_string(msg, RDMA_NLDEV_ATTR_FW_VERSION, fw))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2017-06-28 14:01:37 +03:00
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_NODE_GUID,
|
2018-05-03 08:40:49 -07:00
|
|
|
be64_to_cpu(device->node_guid),
|
|
|
|
RDMA_NLDEV_ATTR_PAD))
|
2017-06-28 14:01:37 +03:00
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SYS_IMAGE_GUID,
|
2018-05-03 08:40:49 -07:00
|
|
|
be64_to_cpu(device->attrs.sys_image_guid),
|
|
|
|
RDMA_NLDEV_ATTR_PAD))
|
2017-06-28 14:01:37 +03:00
|
|
|
return -EMSGSIZE;
|
2017-06-29 16:01:29 +03:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_NODE_TYPE, device->node_type))
|
|
|
|
return -EMSGSIZE;
|
2019-07-08 13:59:04 +03:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, device->use_cq_dim))
|
|
|
|
return -EMSGSIZE;
|
2019-04-02 21:50:34 +03:00
|
|
|
|
2024-06-16 19:08:41 +03:00
|
|
|
if (device->type &&
|
|
|
|
nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_TYPE, device->type))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (device->parent &&
|
|
|
|
nla_put_string(msg, RDMA_NLDEV_ATTR_PARENT_NAME,
|
|
|
|
dev_name(&device->parent->dev)))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2024-07-01 15:40:48 +03:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_NAME_ASSIGN_TYPE,
|
|
|
|
device->name_assign_type))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2019-04-02 21:50:34 +03:00
|
|
|
/*
|
|
|
|
* Link type is determined on first port and mlx4 device
|
|
|
|
* which can potentially have two different link type for the same
|
|
|
|
* IB device is considered as better to be avoided in the future,
|
|
|
|
*/
|
|
|
|
port = rdma_start_port(device);
|
|
|
|
if (rdma_cap_opa_mad(device, port))
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "opa");
|
|
|
|
else if (rdma_protocol_ib(device, port))
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "ib");
|
|
|
|
else if (rdma_protocol_iwarp(device, port))
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "iw");
|
|
|
|
else if (rdma_protocol_roce(device, port))
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL, "roce");
|
|
|
|
else if (rdma_protocol_usnic(device, port))
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_PROTOCOL,
|
|
|
|
"usnic");
|
|
|
|
return ret;
|
2017-06-20 09:59:14 +03:00
|
|
|
}
|
|
|
|
|
2017-06-20 11:30:33 +03:00
|
|
|
static int fill_port_info(struct sk_buff *msg,
|
2018-03-27 20:40:49 +03:00
|
|
|
struct ib_device *device, u32 port,
|
|
|
|
const struct net *net)
|
2017-06-20 11:30:33 +03:00
|
|
|
{
|
2018-03-27 20:40:49 +03:00
|
|
|
struct net_device *netdev = NULL;
|
2017-06-20 14:47:08 +03:00
|
|
|
struct ib_port_attr attr;
|
|
|
|
int ret;
|
2018-12-09 11:58:05 +02:00
|
|
|
u64 cap_flags = 0;
|
2017-06-20 14:47:08 +03:00
|
|
|
|
2018-01-01 13:07:14 +02:00
|
|
|
if (fill_nldev_handle(msg, device))
|
2017-06-20 11:30:33 +03:00
|
|
|
return -EMSGSIZE;
|
2018-01-01 13:07:14 +02:00
|
|
|
|
2017-06-20 11:30:33 +03:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
|
|
|
|
return -EMSGSIZE;
|
2017-06-20 14:47:08 +03:00
|
|
|
|
|
|
|
ret = ib_query_port(device, port, &attr);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2017-06-28 15:38:36 +03:00
|
|
|
if (rdma_protocol_ib(device, port)) {
|
2018-12-09 11:58:05 +02:00
|
|
|
BUILD_BUG_ON((sizeof(attr.port_cap_flags) +
|
|
|
|
sizeof(attr.port_cap_flags2)) > sizeof(u64));
|
|
|
|
cap_flags = attr.port_cap_flags |
|
|
|
|
((u64)attr.port_cap_flags2 << 32);
|
2018-06-05 07:26:51 +03:00
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CAP_FLAGS,
|
2018-12-09 11:58:05 +02:00
|
|
|
cap_flags, RDMA_NLDEV_ATTR_PAD))
|
2018-06-05 07:26:51 +03:00
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_SUBNET_PREFIX,
|
|
|
|
attr.subnet_prefix, RDMA_NLDEV_ATTR_PAD))
|
|
|
|
return -EMSGSIZE;
|
2017-06-28 15:38:36 +03:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_LID, attr.lid))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_SM_LID, attr.sm_lid))
|
|
|
|
return -EMSGSIZE;
|
2017-06-28 15:49:30 +03:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_LMC, attr.lmc))
|
|
|
|
return -EMSGSIZE;
|
2017-06-28 15:38:36 +03:00
|
|
|
}
|
2017-06-29 13:12:45 +03:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_STATE, attr.state))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_PORT_PHYS_STATE, attr.phys_state))
|
|
|
|
return -EMSGSIZE;
|
2018-03-27 20:40:49 +03:00
|
|
|
|
2019-02-12 21:12:50 -07:00
|
|
|
netdev = ib_device_get_netdev(device, port);
|
2018-03-27 20:40:49 +03:00
|
|
|
if (netdev && net_eq(dev_net(netdev), net)) {
|
|
|
|
ret = nla_put_u32(msg,
|
|
|
|
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
ret = nla_put_string(msg,
|
|
|
|
RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
2023-10-24 08:38:15 +08:00
|
|
|
dev_put(netdev);
|
2018-03-27 20:40:49 +03:00
|
|
|
return ret;
|
2017-06-20 11:30:33 +03:00
|
|
|
}
|
|
|
|
|
2018-01-28 11:17:24 +02:00
|
|
|
static int fill_res_info_entry(struct sk_buff *msg,
|
|
|
|
const char *name, u64 curr)
|
|
|
|
{
|
|
|
|
struct nlattr *entry_attr;
|
|
|
|
|
2019-04-26 11:13:06 +02:00
|
|
|
entry_attr = nla_nest_start_noflag(msg,
|
|
|
|
RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY);
|
2018-01-28 11:17:24 +02:00
|
|
|
if (!entry_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (nla_put_string(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_NAME, name))
|
|
|
|
goto err;
|
2018-05-03 08:40:49 -07:00
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_SUMMARY_ENTRY_CURR, curr,
|
|
|
|
RDMA_NLDEV_ATTR_PAD))
|
2018-01-28 11:17:24 +02:00
|
|
|
goto err;
|
|
|
|
|
|
|
|
nla_nest_end(msg, entry_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
nla_nest_cancel(msg, entry_attr);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
2024-04-16 15:03:50 +03:00
|
|
|
static int fill_res_info(struct sk_buff *msg, struct ib_device *device,
|
|
|
|
bool show_details)
|
2018-01-28 11:17:24 +02:00
|
|
|
{
|
|
|
|
static const char * const names[RDMA_RESTRACK_MAX] = {
|
|
|
|
[RDMA_RESTRACK_PD] = "pd",
|
|
|
|
[RDMA_RESTRACK_CQ] = "cq",
|
|
|
|
[RDMA_RESTRACK_QP] = "qp",
|
2018-03-01 13:57:44 -08:00
|
|
|
[RDMA_RESTRACK_CM_ID] = "cm_id",
|
2018-03-01 13:58:13 -08:00
|
|
|
[RDMA_RESTRACK_MR] = "mr",
|
2018-11-28 13:16:45 +02:00
|
|
|
[RDMA_RESTRACK_CTX] = "ctx",
|
2021-04-18 16:41:25 +03:00
|
|
|
[RDMA_RESTRACK_SRQ] = "srq",
|
2018-01-28 11:17:24 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
struct nlattr *table_attr;
|
|
|
|
int ret, i, curr;
|
|
|
|
|
|
|
|
if (fill_nldev_handle(msg, device))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2019-04-26 11:13:06 +02:00
|
|
|
table_attr = nla_nest_start_noflag(msg, RDMA_NLDEV_ATTR_RES_SUMMARY);
|
2018-01-28 11:17:24 +02:00
|
|
|
if (!table_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
for (i = 0; i < RDMA_RESTRACK_MAX; i++) {
|
|
|
|
if (!names[i])
|
|
|
|
continue;
|
2024-04-16 15:03:50 +03:00
|
|
|
curr = rdma_restrack_count(device, i, show_details);
|
2018-01-28 11:17:24 +02:00
|
|
|
ret = fill_res_info_entry(msg, names[i], curr);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
nla_nest_end(msg, table_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
nla_nest_cancel(msg, table_attr);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-03-01 13:57:44 -08:00
|
|
|
static int fill_res_name_pid(struct sk_buff *msg,
|
|
|
|
struct rdma_restrack_entry *res)
|
|
|
|
{
|
2019-10-10 10:11:05 +03:00
|
|
|
int err = 0;
|
|
|
|
|
2018-03-01 13:57:44 -08:00
|
|
|
/*
|
|
|
|
* For user resources, user is should read /proc/PID/comm to get the
|
|
|
|
* name of the task file.
|
|
|
|
*/
|
|
|
|
if (rdma_is_kernel_res(res)) {
|
2019-10-10 10:11:05 +03:00
|
|
|
err = nla_put_string(msg, RDMA_NLDEV_ATTR_RES_KERN_NAME,
|
|
|
|
res->kern_name);
|
2018-03-01 13:57:44 -08:00
|
|
|
} else {
|
2019-10-10 10:11:05 +03:00
|
|
|
pid_t pid;
|
|
|
|
|
|
|
|
pid = task_pid_vnr(res->task);
|
|
|
|
/*
|
|
|
|
* Task is dead and in zombie state.
|
|
|
|
* There is no need to print PID anymore.
|
|
|
|
*/
|
|
|
|
if (pid)
|
|
|
|
/*
|
|
|
|
* This part is racy, task can be killed and PID will
|
|
|
|
* be zero right here but it is ok, next query won't
|
|
|
|
* return PID. We don't promise real-time reflection
|
|
|
|
* of SW objects.
|
|
|
|
*/
|
|
|
|
err = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PID, pid);
|
2018-03-01 13:57:44 -08:00
|
|
|
}
|
2019-10-10 10:11:05 +03:00
|
|
|
|
|
|
|
return err ? -EMSGSIZE : 0;
|
2018-03-01 13:57:44 -08:00
|
|
|
}
|
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
static int fill_res_qp_entry_query(struct sk_buff *msg,
|
|
|
|
struct rdma_restrack_entry *res,
|
|
|
|
struct ib_device *dev,
|
|
|
|
struct ib_qp *qp)
|
2018-01-28 11:17:25 +02:00
|
|
|
{
|
|
|
|
struct ib_qp_init_attr qp_init_attr;
|
|
|
|
struct ib_qp_attr qp_attr;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = ib_query_qp(qp, &qp_attr, 0, &qp_init_attr);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC) {
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQPN,
|
|
|
|
qp_attr.dest_qp_num))
|
|
|
|
goto err;
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RQ_PSN,
|
|
|
|
qp_attr.rq_psn))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SQ_PSN, qp_attr.sq_psn))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (qp->qp_type == IB_QPT_RC || qp->qp_type == IB_QPT_UC ||
|
|
|
|
qp->qp_type == IB_QPT_XRC_INI || qp->qp_type == IB_QPT_XRC_TGT) {
|
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_PATH_MIG_STATE,
|
|
|
|
qp_attr.path_mig_state))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, qp->qp_type))
|
|
|
|
goto err;
|
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, qp_attr.qp_state))
|
|
|
|
goto err;
|
|
|
|
|
2020-06-23 14:30:38 +03:00
|
|
|
if (dev->ops.fill_res_qp_entry)
|
|
|
|
return dev->ops.fill_res_qp_entry(msg, qp);
|
2018-03-01 13:57:44 -08:00
|
|
|
return 0;
|
|
|
|
|
2019-02-18 22:25:45 +02:00
|
|
|
err: return -EMSGSIZE;
|
2018-03-01 13:57:44 -08:00
|
|
|
}
|
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_qp *qp = container_of(res, struct ib_qp, res);
|
|
|
|
struct ib_device *dev = qp->device;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (port && port != qp->port)
|
|
|
|
return -EAGAIN;
|
|
|
|
|
|
|
|
/* In create_qp() port is not set yet */
|
|
|
|
if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
|
2022-11-28 13:52:46 +02:00
|
|
|
return -EMSGSIZE;
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
|
|
|
|
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
|
|
|
|
if (ret)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (!rdma_is_kernel_res(res) &&
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, qp->pd->res.id))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
ret = fill_res_name_pid(msg, res);
|
|
|
|
if (ret)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return fill_res_qp_entry_query(msg, res, dev, qp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_res_qp_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_qp *qp = container_of(res, struct ib_qp, res);
|
|
|
|
struct ib_device *dev = qp->device;
|
|
|
|
|
|
|
|
if (port && port != qp->port)
|
|
|
|
return -EAGAIN;
|
|
|
|
if (!dev->ops.fill_res_qp_entry_raw)
|
|
|
|
return -EINVAL;
|
|
|
|
return dev->ops.fill_res_qp_entry_raw(msg, qp);
|
|
|
|
}
|
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
2018-03-01 13:57:44 -08:00
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct rdma_id_private *id_priv =
|
|
|
|
container_of(res, struct rdma_id_private, res);
|
2019-01-30 12:49:02 +02:00
|
|
|
struct ib_device *dev = id_priv->id.device;
|
2018-03-01 13:57:44 -08:00
|
|
|
struct rdma_cm_id *cm_id = &id_priv->id;
|
|
|
|
|
|
|
|
if (port && port != cm_id->port_num)
|
2022-11-07 10:51:36 +02:00
|
|
|
return -EAGAIN;
|
2018-03-01 13:57:44 -08:00
|
|
|
|
|
|
|
if (cm_id->port_num &&
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (id_priv->qp_num) {
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, id_priv->qp_num))
|
2018-01-28 11:17:25 +02:00
|
|
|
goto err;
|
2018-03-01 13:57:44 -08:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, cm_id->qp_type))
|
2018-01-28 11:17:25 +02:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2018-03-01 13:57:44 -08:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PS, cm_id->ps))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_STATE, id_priv->state))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (cm_id->route.addr.src_addr.ss_family &&
|
|
|
|
nla_put(msg, RDMA_NLDEV_ATTR_RES_SRC_ADDR,
|
|
|
|
sizeof(cm_id->route.addr.src_addr),
|
|
|
|
&cm_id->route.addr.src_addr))
|
|
|
|
goto err;
|
|
|
|
if (cm_id->route.addr.dst_addr.ss_family &&
|
|
|
|
nla_put(msg, RDMA_NLDEV_ATTR_RES_DST_ADDR,
|
|
|
|
sizeof(cm_id->route.addr.dst_addr),
|
|
|
|
&cm_id->route.addr.dst_addr))
|
|
|
|
goto err;
|
|
|
|
|
2019-02-18 22:25:49 +02:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CM_IDN, res->id))
|
|
|
|
goto err;
|
|
|
|
|
2018-03-01 13:57:44 -08:00
|
|
|
if (fill_res_name_pid(msg, res))
|
|
|
|
goto err;
|
|
|
|
|
2020-06-23 14:30:39 +03:00
|
|
|
if (dev->ops.fill_res_cm_id_entry)
|
|
|
|
return dev->ops.fill_res_cm_id_entry(msg, cm_id);
|
2018-01-28 11:17:25 +02:00
|
|
|
return 0;
|
|
|
|
|
2019-02-18 22:25:45 +02:00
|
|
|
err: return -EMSGSIZE;
|
2018-01-28 11:17:25 +02:00
|
|
|
}
|
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
static int fill_res_cq_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
2018-03-01 13:57:51 -08:00
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_cq *cq = container_of(res, struct ib_cq, res);
|
2019-01-30 12:49:02 +02:00
|
|
|
struct ib_device *dev = cq->device;
|
2018-03-01 13:57:51 -08:00
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQE, cq->cqe))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:57:51 -08:00
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
|
2018-05-03 08:40:49 -07:00
|
|
|
atomic_read(&cq->usecnt), RDMA_NLDEV_ATTR_PAD))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:57:51 -08:00
|
|
|
|
|
|
|
/* Poll context is only valid for kernel CQs */
|
|
|
|
if (rdma_is_kernel_res(res) &&
|
|
|
|
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_POLL_CTX, cq->poll_ctx))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:57:51 -08:00
|
|
|
|
2019-07-08 13:59:04 +03:00
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_DEV_DIM, (cq->dim != NULL)))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2019-07-08 13:59:04 +03:00
|
|
|
|
2019-02-18 22:25:49 +02:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN, res->id))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2019-02-18 22:25:50 +02:00
|
|
|
if (!rdma_is_kernel_res(res) &&
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
|
2020-01-08 19:21:58 +02:00
|
|
|
cq->uobject->uevent.uobject.context->res.id))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2019-02-18 22:25:49 +02:00
|
|
|
|
2018-03-01 13:57:51 -08:00
|
|
|
if (fill_res_name_pid(msg, res))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:57:51 -08:00
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return (dev->ops.fill_res_cq_entry) ?
|
|
|
|
dev->ops.fill_res_cq_entry(msg, cq) : 0;
|
|
|
|
}
|
2018-03-01 13:57:51 -08:00
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
static int fill_res_cq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_cq *cq = container_of(res, struct ib_cq, res);
|
|
|
|
struct ib_device *dev = cq->device;
|
|
|
|
|
|
|
|
if (!dev->ops.fill_res_cq_entry_raw)
|
|
|
|
return -EINVAL;
|
|
|
|
return dev->ops.fill_res_cq_entry_raw(msg, cq);
|
2018-03-01 13:57:51 -08:00
|
|
|
}
|
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
static int fill_res_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
2018-03-01 13:58:13 -08:00
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_mr *mr = container_of(res, struct ib_mr, res);
|
2019-01-30 12:49:02 +02:00
|
|
|
struct ib_device *dev = mr->pd->device;
|
2018-03-01 13:58:13 -08:00
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
if (has_cap_net_admin) {
|
2018-03-01 13:58:13 -08:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_RKEY, mr->rkey))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:58:13 -08:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LKEY, mr->lkey))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:58:13 -08:00
|
|
|
}
|
|
|
|
|
2018-05-03 08:40:49 -07:00
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_MRLEN, mr->length,
|
|
|
|
RDMA_NLDEV_ATTR_PAD))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:58:13 -08:00
|
|
|
|
2019-02-18 22:25:49 +02:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2019-02-18 22:25:49 +02:00
|
|
|
|
2019-02-18 22:25:50 +02:00
|
|
|
if (!rdma_is_kernel_res(res) &&
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, mr->pd->res.id))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2019-02-18 22:25:50 +02:00
|
|
|
|
2018-03-01 13:58:13 -08:00
|
|
|
if (fill_res_name_pid(msg, res))
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return -EMSGSIZE;
|
2018-03-01 13:58:13 -08:00
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
return (dev->ops.fill_res_mr_entry) ?
|
|
|
|
dev->ops.fill_res_mr_entry(msg, mr) :
|
|
|
|
0;
|
|
|
|
}
|
2018-03-01 13:58:13 -08:00
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
static int fill_res_mr_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_mr *mr = container_of(res, struct ib_mr, res);
|
|
|
|
struct ib_device *dev = mr->pd->device;
|
|
|
|
|
|
|
|
if (!dev->ops.fill_res_mr_entry_raw)
|
|
|
|
return -EINVAL;
|
|
|
|
return dev->ops.fill_res_mr_entry_raw(msg, mr);
|
2018-03-01 13:58:13 -08:00
|
|
|
}
|
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
static int fill_res_pd_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
2018-03-01 13:58:28 -08:00
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_pd *pd = container_of(res, struct ib_pd, res);
|
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
if (has_cap_net_admin) {
|
2018-03-01 13:58:28 -08:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LOCAL_DMA_LKEY,
|
|
|
|
pd->local_dma_lkey))
|
|
|
|
goto err;
|
|
|
|
if ((pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) &&
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_UNSAFE_GLOBAL_RKEY,
|
|
|
|
pd->unsafe_global_rkey))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_RES_USECNT,
|
2018-05-03 08:40:49 -07:00
|
|
|
atomic_read(&pd->usecnt), RDMA_NLDEV_ATTR_PAD))
|
2018-03-01 13:58:28 -08:00
|
|
|
goto err;
|
|
|
|
|
2019-02-18 22:25:49 +02:00
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, res->id))
|
|
|
|
goto err;
|
|
|
|
|
2019-02-18 22:25:50 +02:00
|
|
|
if (!rdma_is_kernel_res(res) &&
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN,
|
|
|
|
pd->uobject->context->res.id))
|
|
|
|
goto err;
|
|
|
|
|
2020-06-23 14:30:35 +03:00
|
|
|
return fill_res_name_pid(msg, res);
|
2018-03-01 13:58:28 -08:00
|
|
|
|
2019-02-18 22:25:45 +02:00
|
|
|
err: return -EMSGSIZE;
|
2018-03-01 13:58:28 -08:00
|
|
|
}
|
|
|
|
|
2021-04-18 16:41:23 +03:00
|
|
|
static int fill_res_ctx_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_ucontext *ctx = container_of(res, struct ib_ucontext, res);
|
|
|
|
|
|
|
|
if (rdma_is_kernel_res(res))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CTXN, ctx->res.id))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return fill_res_name_pid(msg, res);
|
|
|
|
}
|
|
|
|
|
2021-04-18 16:41:26 +03:00
|
|
|
static int fill_res_range_qp_entry(struct sk_buff *msg, uint32_t min_range,
|
|
|
|
uint32_t max_range)
|
|
|
|
{
|
|
|
|
struct nlattr *entry_attr;
|
|
|
|
|
|
|
|
if (!min_range)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
|
|
|
|
if (!entry_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (min_range == max_range) {
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, min_range))
|
|
|
|
goto err;
|
|
|
|
} else {
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MIN_RANGE, min_range))
|
|
|
|
goto err;
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_MAX_RANGE, max_range))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
nla_nest_end(msg, entry_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
nla_nest_cancel(msg, entry_attr);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_res_srq_qps(struct sk_buff *msg, struct ib_srq *srq)
|
|
|
|
{
|
|
|
|
uint32_t min_range = 0, prev = 0;
|
|
|
|
struct rdma_restrack_entry *res;
|
|
|
|
struct rdma_restrack_root *rt;
|
|
|
|
struct nlattr *table_attr;
|
|
|
|
struct ib_qp *qp = NULL;
|
|
|
|
unsigned long id = 0;
|
|
|
|
|
|
|
|
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
|
|
|
|
if (!table_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
rt = &srq->device->res[RDMA_RESTRACK_QP];
|
|
|
|
xa_lock(&rt->xa);
|
|
|
|
xa_for_each(&rt->xa, id, res) {
|
|
|
|
if (!rdma_restrack_get(res))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
qp = container_of(res, struct ib_qp, res);
|
|
|
|
if (!qp->srq || (qp->srq->res.id != srq->res.id)) {
|
|
|
|
rdma_restrack_put(res);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (qp->qp_num < prev)
|
|
|
|
/* qp_num should be ascending */
|
|
|
|
goto err_loop;
|
|
|
|
|
|
|
|
if (min_range == 0) {
|
|
|
|
min_range = qp->qp_num;
|
|
|
|
} else if (qp->qp_num > (prev + 1)) {
|
|
|
|
if (fill_res_range_qp_entry(msg, min_range, prev))
|
|
|
|
goto err_loop;
|
|
|
|
|
|
|
|
min_range = qp->qp_num;
|
|
|
|
}
|
|
|
|
prev = qp->qp_num;
|
|
|
|
rdma_restrack_put(res);
|
|
|
|
}
|
|
|
|
|
|
|
|
xa_unlock(&rt->xa);
|
|
|
|
|
|
|
|
if (fill_res_range_qp_entry(msg, min_range, prev))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
nla_nest_end(msg, table_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_loop:
|
|
|
|
rdma_restrack_put(res);
|
|
|
|
xa_unlock(&rt->xa);
|
|
|
|
err:
|
|
|
|
nla_nest_cancel(msg, table_attr);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
2021-04-18 16:41:25 +03:00
|
|
|
static int fill_res_srq_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_srq *srq = container_of(res, struct ib_srq, res);
|
2023-09-18 21:11:08 +08:00
|
|
|
struct ib_device *dev = srq->device;
|
2021-04-18 16:41:25 +03:00
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_SRQN, srq->res.id))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, srq->srq_type))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_PDN, srq->pd->res.id))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (ib_srq_has_cq(srq->srq_type)) {
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_CQN,
|
|
|
|
srq->ext.cq->res.id))
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2021-04-18 16:41:26 +03:00
|
|
|
if (fill_res_srq_qps(msg, srq))
|
|
|
|
goto err;
|
|
|
|
|
2023-09-18 21:11:08 +08:00
|
|
|
if (fill_res_name_pid(msg, res))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
if (dev->ops.fill_res_srq_entry)
|
|
|
|
return dev->ops.fill_res_srq_entry(msg, srq);
|
|
|
|
|
|
|
|
return 0;
|
2021-04-18 16:41:25 +03:00
|
|
|
|
|
|
|
err:
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
2023-09-18 21:11:09 +08:00
|
|
|
static int fill_res_srq_raw_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_srq *srq = container_of(res, struct ib_srq, res);
|
|
|
|
struct ib_device *dev = srq->device;
|
|
|
|
|
|
|
|
if (!dev->ops.fill_res_srq_entry_raw)
|
|
|
|
return -EINVAL;
|
|
|
|
return dev->ops.fill_res_srq_entry_raw(msg, srq);
|
|
|
|
}
|
|
|
|
|
2019-07-02 13:02:40 +03:00
|
|
|
static int fill_stat_counter_mode(struct sk_buff *msg,
|
|
|
|
struct rdma_counter *counter)
|
|
|
|
{
|
|
|
|
struct rdma_counter_mode *m = &counter->mode;
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, m->mode))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2020-07-02 11:29:31 +03:00
|
|
|
if (m->mode == RDMA_COUNTER_MODE_AUTO) {
|
2019-07-02 13:02:40 +03:00
|
|
|
if ((m->mask & RDMA_COUNTER_MASK_QP_TYPE) &&
|
|
|
|
nla_put_u8(msg, RDMA_NLDEV_ATTR_RES_TYPE, m->param.qp_type))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2020-07-02 11:29:31 +03:00
|
|
|
if ((m->mask & RDMA_COUNTER_MASK_PID) &&
|
|
|
|
fill_res_name_pid(msg, &counter->res))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
2019-07-02 13:02:40 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_stat_counter_qp_entry(struct sk_buff *msg, u32 qpn)
|
|
|
|
{
|
|
|
|
struct nlattr *entry_attr;
|
|
|
|
|
|
|
|
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP_ENTRY);
|
|
|
|
if (!entry_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
nla_nest_end(msg, entry_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
nla_nest_cancel(msg, entry_attr);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_stat_counter_qps(struct sk_buff *msg,
|
|
|
|
struct rdma_counter *counter)
|
|
|
|
{
|
|
|
|
struct rdma_restrack_entry *res;
|
|
|
|
struct rdma_restrack_root *rt;
|
|
|
|
struct nlattr *table_attr;
|
|
|
|
struct ib_qp *qp = NULL;
|
|
|
|
unsigned long id = 0;
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
|
2022-11-26 04:34:10 +00:00
|
|
|
if (!table_attr)
|
|
|
|
return -EMSGSIZE;
|
2019-07-02 13:02:40 +03:00
|
|
|
|
|
|
|
rt = &counter->device->res[RDMA_RESTRACK_QP];
|
|
|
|
xa_lock(&rt->xa);
|
|
|
|
xa_for_each(&rt->xa, id, res) {
|
|
|
|
qp = container_of(res, struct ib_qp, res);
|
|
|
|
if (!qp->counter || (qp->counter->id != counter->id))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = fill_stat_counter_qp_entry(msg, qp->qp_num);
|
|
|
|
if (ret)
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
xa_unlock(&rt->xa);
|
|
|
|
nla_nest_end(msg, table_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
xa_unlock(&rt->xa);
|
|
|
|
nla_nest_cancel(msg, table_attr);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-10-16 09:23:08 +03:00
|
|
|
int rdma_nl_stat_hwcounter_entry(struct sk_buff *msg, const char *name,
|
|
|
|
u64 value)
|
2019-07-02 13:02:40 +03:00
|
|
|
{
|
|
|
|
struct nlattr *entry_attr;
|
|
|
|
|
|
|
|
entry_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
|
|
|
|
if (!entry_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
if (nla_put_string(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
|
|
|
|
name))
|
|
|
|
goto err;
|
|
|
|
if (nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_VALUE,
|
|
|
|
value, RDMA_NLDEV_ATTR_PAD))
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
nla_nest_end(msg, entry_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
nla_nest_cancel(msg, entry_attr);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
2019-10-16 09:23:08 +03:00
|
|
|
EXPORT_SYMBOL(rdma_nl_stat_hwcounter_entry);
|
|
|
|
|
|
|
|
static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res, uint32_t port)
|
|
|
|
{
|
|
|
|
struct ib_mr *mr = container_of(res, struct ib_mr, res);
|
|
|
|
struct ib_device *dev = mr->pd->device;
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id))
|
|
|
|
goto err;
|
|
|
|
|
2020-06-23 14:30:36 +03:00
|
|
|
if (dev->ops.fill_stat_mr_entry)
|
|
|
|
return dev->ops.fill_stat_mr_entry(msg, mr);
|
2019-10-16 09:23:08 +03:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
2019-07-02 13:02:40 +03:00
|
|
|
|
|
|
|
static int fill_stat_counter_hwcounters(struct sk_buff *msg,
|
|
|
|
struct rdma_counter *counter)
|
|
|
|
{
|
|
|
|
struct rdma_hw_stats *st = counter->stats;
|
|
|
|
struct nlattr *table_attr;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
|
|
|
|
if (!table_attr)
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
2021-10-08 15:24:31 +03:00
|
|
|
mutex_lock(&st->lock);
|
|
|
|
for (i = 0; i < st->num_counters; i++) {
|
|
|
|
if (test_bit(i, st->is_disabled))
|
|
|
|
continue;
|
2021-10-08 15:24:29 +03:00
|
|
|
if (rdma_nl_stat_hwcounter_entry(msg, st->descs[i].name,
|
|
|
|
st->value[i]))
|
2019-07-02 13:02:40 +03:00
|
|
|
goto err;
|
2021-10-08 15:24:31 +03:00
|
|
|
}
|
|
|
|
mutex_unlock(&st->lock);
|
2019-07-02 13:02:40 +03:00
|
|
|
|
|
|
|
nla_nest_end(msg, table_attr);
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
2021-10-08 15:24:31 +03:00
|
|
|
mutex_unlock(&st->lock);
|
2019-07-02 13:02:40 +03:00
|
|
|
nla_nest_cancel(msg, table_attr);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fill_res_counter_entry(struct sk_buff *msg, bool has_cap_net_admin,
|
|
|
|
struct rdma_restrack_entry *res,
|
|
|
|
uint32_t port)
|
|
|
|
{
|
|
|
|
struct rdma_counter *counter =
|
|
|
|
container_of(res, struct rdma_counter, res);
|
|
|
|
|
|
|
|
if (port && port != counter->port)
|
2019-10-20 09:28:00 +03:00
|
|
|
return -EAGAIN;
|
2019-07-02 13:02:40 +03:00
|
|
|
|
|
|
|
/* Dump it even query failed */
|
|
|
|
rdma_counter_query_stats(counter);
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, counter->port) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, counter->id) ||
|
|
|
|
fill_stat_counter_mode(msg, counter) ||
|
|
|
|
fill_stat_counter_qps(msg, counter) ||
|
|
|
|
fill_stat_counter_hwcounters(msg, counter))
|
|
|
|
return -EMSGSIZE;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-06-15 20:33:08 +03:00
|
|
|
static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index;
|
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2017-06-15 20:33:08 +03:00
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2017-06-15 20:33:08 +03:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
2018-01-01 13:07:15 +02:00
|
|
|
if (!msg) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
2017-06-15 20:33:08 +03:00
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
err = -EMSGSIZE;
|
|
|
|
goto err_free;
|
|
|
|
}
|
2017-06-15 20:33:08 +03:00
|
|
|
|
|
|
|
err = fill_dev_info(msg, device);
|
2018-01-01 13:07:15 +02:00
|
|
|
if (err)
|
|
|
|
goto err_free;
|
2017-06-15 20:33:08 +03:00
|
|
|
|
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2018-01-01 13:07:15 +02:00
|
|
|
|
|
|
|
err_free:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2018-01-01 13:07:15 +02:00
|
|
|
return err;
|
2017-06-15 20:33:08 +03:00
|
|
|
}
|
|
|
|
|
2018-10-10 09:19:12 +03:00
|
|
|
static int nldev_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
u32 index;
|
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
2018-10-10 09:19:12 +03:00
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2018-10-10 09:19:12 +03:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_DEV_NAME]) {
|
|
|
|
char name[IB_DEVICE_NAME_MAX] = {};
|
|
|
|
|
2020-11-15 18:08:06 +01:00
|
|
|
nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
|
2018-10-10 09:19:12 +03:00
|
|
|
IB_DEVICE_NAME_MAX);
|
2020-03-09 16:05:15 -03:00
|
|
|
if (strlen(name) == 0) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto done;
|
|
|
|
}
|
2018-10-10 09:19:12 +03:00
|
|
|
err = ib_device_rename(device, name);
|
2019-04-15 13:22:51 +03:00
|
|
|
goto done;
|
2018-10-10 09:19:12 +03:00
|
|
|
}
|
|
|
|
|
2019-04-15 13:22:51 +03:00
|
|
|
if (tb[RDMA_NLDEV_NET_NS_FD]) {
|
|
|
|
u32 ns_fd;
|
|
|
|
|
|
|
|
ns_fd = nla_get_u32(tb[RDMA_NLDEV_NET_NS_FD]);
|
|
|
|
err = ib_device_set_netns_put(skb, device, ns_fd);
|
|
|
|
goto put_done;
|
|
|
|
}
|
|
|
|
|
2019-07-08 13:59:04 +03:00
|
|
|
if (tb[RDMA_NLDEV_ATTR_DEV_DIM]) {
|
|
|
|
u8 use_dim;
|
|
|
|
|
|
|
|
use_dim = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_DIM]);
|
|
|
|
err = ib_device_set_dim(device, use_dim);
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
2019-04-15 13:22:51 +03:00
|
|
|
done:
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2019-04-15 13:22:51 +03:00
|
|
|
put_done:
|
2018-10-10 09:19:12 +03:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-06-20 09:59:14 +03:00
|
|
|
static int _nldev_get_dumpit(struct ib_device *device,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb,
|
|
|
|
unsigned int idx)
|
|
|
|
{
|
|
|
|
int start = cb->args[0];
|
|
|
|
struct nlmsghdr *nlh;
|
|
|
|
|
|
|
|
if (idx < start)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
|
|
|
|
0, NLM_F_MULTI);
|
|
|
|
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_dev_info(skb, device)) {
|
2017-06-20 09:59:14 +03:00
|
|
|
nlmsg_cancel(skb, nlh);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
nlmsg_end(skb, nlh);
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
out: cb->args[0] = idx;
|
|
|
|
return skb->len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* There is no need to take lock, because
|
2019-02-26 14:01:47 +02:00
|
|
|
* we are relying on ib_core's locking.
|
2017-06-20 09:59:14 +03:00
|
|
|
*/
|
|
|
|
return ib_enum_all_devs(_nldev_get_dumpit, skb, cb);
|
|
|
|
}
|
|
|
|
|
2017-06-22 16:10:38 +03:00
|
|
|
static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index;
|
|
|
|
u32 port;
|
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2017-10-31 20:09:54 +02:00
|
|
|
if (err ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_PORT_INDEX])
|
2017-06-22 16:10:38 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2017-06-22 16:10:38 +03:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
2018-01-01 13:07:15 +02:00
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
2017-06-22 16:10:38 +03:00
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
2018-01-01 13:07:15 +02:00
|
|
|
if (!msg) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
2017-06-22 16:10:38 +03:00
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
err = -EMSGSIZE;
|
|
|
|
goto err_free;
|
|
|
|
}
|
2017-06-22 16:10:38 +03:00
|
|
|
|
2018-03-27 20:40:49 +03:00
|
|
|
err = fill_port_info(msg, device, port, sock_net(skb->sk));
|
2018-01-01 13:07:15 +02:00
|
|
|
if (err)
|
|
|
|
goto err_free;
|
2017-06-22 16:10:38 +03:00
|
|
|
|
|
|
|
nlmsg_end(msg, nlh);
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2017-06-22 16:10:38 +03:00
|
|
|
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2018-01-01 13:07:15 +02:00
|
|
|
|
|
|
|
err_free:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2018-01-01 13:07:15 +02:00
|
|
|
return err;
|
2017-06-22 16:10:38 +03:00
|
|
|
}
|
|
|
|
|
2017-06-20 11:30:33 +03:00
|
|
|
static int nldev_port_get_dumpit(struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
int start = cb->args[0];
|
|
|
|
struct nlmsghdr *nlh;
|
|
|
|
u32 idx = 0;
|
|
|
|
u32 ifindex;
|
|
|
|
int err;
|
2019-02-12 21:12:47 -07:00
|
|
|
unsigned int p;
|
2017-06-20 11:30:33 +03:00
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, NULL);
|
2017-06-20 11:30:33 +03:00
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ifindex = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), ifindex);
|
2017-06-20 11:30:33 +03:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2019-02-12 21:12:47 -07:00
|
|
|
rdma_for_each_port (device, p) {
|
2017-06-20 11:30:33 +03:00
|
|
|
/*
|
|
|
|
* The dumpit function returns all information from specific
|
|
|
|
* index. This specific index is taken from the netlink
|
|
|
|
* messages request sent by user and it is available
|
|
|
|
* in cb->args[0].
|
|
|
|
*
|
|
|
|
* Usually, the user doesn't fill this field and it causes
|
|
|
|
* to return everything.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
if (idx < start) {
|
|
|
|
idx++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid,
|
|
|
|
cb->nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_PORT_GET),
|
|
|
|
0, NLM_F_MULTI);
|
|
|
|
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
|
2017-06-20 11:30:33 +03:00
|
|
|
nlmsg_cancel(skb, nlh);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
idx++;
|
|
|
|
nlmsg_end(skb, nlh);
|
|
|
|
}
|
|
|
|
|
2018-01-01 13:07:15 +02:00
|
|
|
out:
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2018-01-01 13:07:15 +02:00
|
|
|
cb->args[0] = idx;
|
2017-06-20 11:30:33 +03:00
|
|
|
return skb->len;
|
|
|
|
}
|
|
|
|
|
2018-01-28 11:17:24 +02:00
|
|
|
static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
2024-04-16 15:03:50 +03:00
|
|
|
bool show_details = false;
|
2018-01-28 11:17:24 +02:00
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index;
|
|
|
|
int ret;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2018-01-28 11:17:24 +02:00
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2018-01-28 11:17:24 +02:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-04-16 15:03:50 +03:00
|
|
|
if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
|
|
|
|
show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
|
|
|
|
|
2018-01-28 11:17:24 +02:00
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
2018-02-01 13:01:48 +03:00
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
2018-01-28 11:17:24 +02:00
|
|
|
goto err;
|
2018-02-01 13:01:48 +03:00
|
|
|
}
|
2018-01-28 11:17:24 +02:00
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_free;
|
|
|
|
}
|
2018-01-28 11:17:24 +02:00
|
|
|
|
2024-04-16 15:03:50 +03:00
|
|
|
ret = fill_res_info(msg, device, show_details);
|
2018-01-28 11:17:24 +02:00
|
|
|
if (ret)
|
|
|
|
goto err_free;
|
|
|
|
|
|
|
|
nlmsg_end(msg, nlh);
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2018-01-28 11:17:24 +02:00
|
|
|
|
|
|
|
err_free:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2018-01-28 11:17:24 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int _nldev_res_get_dumpit(struct ib_device *device,
|
|
|
|
struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb,
|
|
|
|
unsigned int idx)
|
|
|
|
{
|
|
|
|
int start = cb->args[0];
|
|
|
|
struct nlmsghdr *nlh;
|
|
|
|
|
|
|
|
if (idx < start)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
|
|
|
|
0, NLM_F_MULTI);
|
|
|
|
|
2024-04-16 15:03:50 +03:00
|
|
|
if (!nlh || fill_res_info(skb, device, false)) {
|
2018-01-28 11:17:24 +02:00
|
|
|
nlmsg_cancel(skb, nlh);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
nlmsg_end(skb, nlh);
|
|
|
|
|
|
|
|
idx++;
|
|
|
|
|
|
|
|
out:
|
|
|
|
cb->args[0] = idx;
|
|
|
|
return skb->len;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_res_get_dumpit(struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
return ib_enum_all_devs(_nldev_res_get_dumpit, skb, cb);
|
|
|
|
}
|
|
|
|
|
2018-03-01 13:57:29 -08:00
|
|
|
struct nldev_fill_res_entry {
|
|
|
|
enum rdma_nldev_attr nldev_attr;
|
2019-02-18 22:25:45 +02:00
|
|
|
u8 flags;
|
|
|
|
u32 entry;
|
|
|
|
u32 id;
|
|
|
|
};
|
|
|
|
|
|
|
|
enum nldev_res_flags {
|
|
|
|
NLDEV_PER_DEV = 1 << 0,
|
2018-03-01 13:57:29 -08:00
|
|
|
};
|
|
|
|
|
|
|
|
static const struct nldev_fill_res_entry fill_entries[RDMA_RESTRACK_MAX] = {
|
|
|
|
[RDMA_RESTRACK_QP] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_QP,
|
2019-02-18 22:25:45 +02:00
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_QP_ENTRY,
|
2019-02-18 22:25:51 +02:00
|
|
|
.id = RDMA_NLDEV_ATTR_RES_LQPN,
|
2018-03-01 13:57:29 -08:00
|
|
|
},
|
2018-03-01 13:57:44 -08:00
|
|
|
[RDMA_RESTRACK_CM_ID] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_CM_ID,
|
2019-02-18 22:25:45 +02:00
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_CM_ID_ENTRY,
|
2019-02-18 22:25:49 +02:00
|
|
|
.id = RDMA_NLDEV_ATTR_RES_CM_IDN,
|
2018-03-01 13:57:44 -08:00
|
|
|
},
|
2018-03-01 13:57:51 -08:00
|
|
|
[RDMA_RESTRACK_CQ] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_CQ,
|
2019-02-18 22:25:45 +02:00
|
|
|
.flags = NLDEV_PER_DEV,
|
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_CQ_ENTRY,
|
2019-02-18 22:25:49 +02:00
|
|
|
.id = RDMA_NLDEV_ATTR_RES_CQN,
|
2018-03-01 13:57:51 -08:00
|
|
|
},
|
2018-03-01 13:58:13 -08:00
|
|
|
[RDMA_RESTRACK_MR] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_MR,
|
2019-02-18 22:25:45 +02:00
|
|
|
.flags = NLDEV_PER_DEV,
|
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_MR_ENTRY,
|
2019-02-18 22:25:49 +02:00
|
|
|
.id = RDMA_NLDEV_ATTR_RES_MRN,
|
2018-03-01 13:58:13 -08:00
|
|
|
},
|
2018-03-01 13:58:28 -08:00
|
|
|
[RDMA_RESTRACK_PD] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_PD,
|
2019-02-18 22:25:45 +02:00
|
|
|
.flags = NLDEV_PER_DEV,
|
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_PD_ENTRY,
|
2019-02-18 22:25:49 +02:00
|
|
|
.id = RDMA_NLDEV_ATTR_RES_PDN,
|
2018-03-01 13:58:28 -08:00
|
|
|
},
|
2019-07-02 13:02:40 +03:00
|
|
|
[RDMA_RESTRACK_COUNTER] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_STAT_COUNTER,
|
|
|
|
.entry = RDMA_NLDEV_ATTR_STAT_COUNTER_ENTRY,
|
|
|
|
.id = RDMA_NLDEV_ATTR_STAT_COUNTER_ID,
|
|
|
|
},
|
2021-04-18 16:41:23 +03:00
|
|
|
[RDMA_RESTRACK_CTX] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_CTX,
|
|
|
|
.flags = NLDEV_PER_DEV,
|
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_CTX_ENTRY,
|
|
|
|
.id = RDMA_NLDEV_ATTR_RES_CTXN,
|
|
|
|
},
|
2021-04-18 16:41:25 +03:00
|
|
|
[RDMA_RESTRACK_SRQ] = {
|
|
|
|
.nldev_attr = RDMA_NLDEV_ATTR_RES_SRQ,
|
|
|
|
.flags = NLDEV_PER_DEV,
|
|
|
|
.entry = RDMA_NLDEV_ATTR_RES_SRQ_ENTRY,
|
|
|
|
.id = RDMA_NLDEV_ATTR_RES_SRQN,
|
|
|
|
},
|
|
|
|
|
2018-03-01 13:57:29 -08:00
|
|
|
};
|
|
|
|
|
2025-06-20 13:33:26 +02:00
|
|
|
static noinline_for_stack int
|
|
|
|
res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack,
|
|
|
|
enum rdma_restrack_type res_type,
|
|
|
|
res_fill_func_t fill_func)
|
2019-02-18 22:25:45 +02:00
|
|
|
{
|
|
|
|
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct rdma_restrack_entry *res;
|
|
|
|
struct ib_device *device;
|
|
|
|
u32 index, id, port = 0;
|
|
|
|
bool has_cap_net_admin;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
int ret;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2019-02-18 22:25:45 +02:00
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !fe->id || !tb[fe->id])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2019-02-18 22:25:45 +02:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((port && fe->flags & NLDEV_PER_DEV) ||
|
|
|
|
(!port && ~fe->flags & NLDEV_PER_DEV)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
id = nla_get_u32(tb[fe->id]);
|
|
|
|
res = rdma_restrack_get_byid(device, res_type, id);
|
|
|
|
if (IS_ERR(res)) {
|
|
|
|
ret = PTR_ERR(res);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
2019-08-18 11:10:44 +02:00
|
|
|
goto err_get;
|
2019-02-18 22:25:45 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NL_GET_OP(nlh->nlmsg_type)),
|
2019-02-18 22:25:45 +02:00
|
|
|
0, 0);
|
|
|
|
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_nldev_handle(msg, device)) {
|
2019-02-18 22:25:45 +02:00
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
|
|
|
|
has_cap_net_admin = netlink_capable(skb, CAP_NET_ADMIN);
|
2019-10-16 09:23:06 +03:00
|
|
|
|
|
|
|
ret = fill_func(msg, has_cap_net_admin, res, port);
|
2019-02-18 22:25:45 +02:00
|
|
|
if (ret)
|
|
|
|
goto err_free;
|
|
|
|
|
2020-05-07 09:29:42 +03:00
|
|
|
rdma_restrack_put(res);
|
2019-02-18 22:25:45 +02:00
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-02-18 22:25:45 +02:00
|
|
|
|
|
|
|
err_free:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err_get:
|
|
|
|
rdma_restrack_put(res);
|
|
|
|
err:
|
|
|
|
ib_device_put(device);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2018-03-01 13:57:29 -08:00
|
|
|
static int res_get_common_dumpit(struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb,
|
2019-10-16 09:23:06 +03:00
|
|
|
enum rdma_restrack_type res_type,
|
|
|
|
res_fill_func_t fill_func)
|
2018-01-28 11:17:25 +02:00
|
|
|
{
|
2018-03-01 13:57:29 -08:00
|
|
|
const struct nldev_fill_res_entry *fe = &fill_entries[res_type];
|
2018-01-28 11:17:25 +02:00
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct rdma_restrack_entry *res;
|
2019-02-18 22:25:48 +02:00
|
|
|
struct rdma_restrack_root *rt;
|
2018-01-28 11:17:25 +02:00
|
|
|
int err, ret = 0, idx = 0;
|
2024-04-16 15:03:50 +03:00
|
|
|
bool show_details = false;
|
2018-01-28 11:17:25 +02:00
|
|
|
struct nlattr *table_attr;
|
2019-02-18 22:25:45 +02:00
|
|
|
struct nlattr *entry_attr;
|
2018-01-28 11:17:25 +02:00
|
|
|
struct ib_device *device;
|
|
|
|
int start = cb->args[0];
|
2019-01-30 12:48:55 +02:00
|
|
|
bool has_cap_net_admin;
|
2018-01-28 11:17:25 +02:00
|
|
|
struct nlmsghdr *nlh;
|
2019-02-18 22:25:43 +02:00
|
|
|
unsigned long id;
|
2018-01-28 11:17:25 +02:00
|
|
|
u32 index, port = 0;
|
2018-03-01 13:57:29 -08:00
|
|
|
bool filled = false;
|
2018-01-28 11:17:25 +02:00
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, NULL);
|
2018-01-28 11:17:25 +02:00
|
|
|
/*
|
2018-03-01 13:57:29 -08:00
|
|
|
* Right now, we are expecting the device index to get res information,
|
2018-01-28 11:17:25 +02:00
|
|
|
* but it is possible to extend this code to return all devices in
|
|
|
|
* one shot by checking the existence of RDMA_NLDEV_ATTR_DEV_INDEX.
|
|
|
|
* if it doesn't exist, we will iterate over all devices.
|
|
|
|
*
|
|
|
|
* But it is not needed for now.
|
|
|
|
*/
|
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2018-01-28 11:17:25 +02:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2024-04-16 15:03:50 +03:00
|
|
|
if (tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS])
|
|
|
|
show_details = nla_get_u8(tb[RDMA_NLDEV_ATTR_DRIVER_DETAILS]);
|
|
|
|
|
2018-01-28 11:17:25 +02:00
|
|
|
/*
|
|
|
|
* If no PORT_INDEX is supplied, we will return all QPs from that device
|
|
|
|
*/
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_index;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
|
2018-01-28 11:17:25 +02:00
|
|
|
0, NLM_F_MULTI);
|
|
|
|
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_nldev_handle(skb, device)) {
|
2018-01-28 11:17:25 +02:00
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2019-04-26 11:13:06 +02:00
|
|
|
table_attr = nla_nest_start_noflag(skb, fe->nldev_attr);
|
2018-01-28 11:17:25 +02:00
|
|
|
if (!table_attr) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2019-01-30 12:48:55 +02:00
|
|
|
has_cap_net_admin = netlink_capable(cb->skb, CAP_NET_ADMIN);
|
|
|
|
|
2019-02-18 22:25:48 +02:00
|
|
|
rt = &device->res[res_type];
|
|
|
|
xa_lock(&rt->xa);
|
2019-02-18 22:25:43 +02:00
|
|
|
/*
|
|
|
|
* FIXME: if the skip ahead is something common this loop should
|
|
|
|
* use xas_for_each & xas_pause to optimize, we can have a lot of
|
|
|
|
* objects.
|
|
|
|
*/
|
2019-02-18 22:25:48 +02:00
|
|
|
xa_for_each(&rt->xa, id, res) {
|
2024-04-16 15:03:50 +03:00
|
|
|
if (xa_get_mark(&rt->xa, res->id, RESTRACK_DD) && !show_details)
|
|
|
|
goto next;
|
|
|
|
|
2019-02-18 22:25:52 +02:00
|
|
|
if (idx < start || !rdma_restrack_get(res))
|
2018-01-28 11:17:25 +02:00
|
|
|
goto next;
|
|
|
|
|
2019-02-18 22:25:48 +02:00
|
|
|
xa_unlock(&rt->xa);
|
|
|
|
|
2018-03-01 13:57:29 -08:00
|
|
|
filled = true;
|
2018-01-28 11:17:25 +02:00
|
|
|
|
2019-04-26 11:13:06 +02:00
|
|
|
entry_attr = nla_nest_start_noflag(skb, fe->entry);
|
2019-02-18 22:25:45 +02:00
|
|
|
if (!entry_attr) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
rdma_restrack_put(res);
|
2019-02-18 22:25:48 +02:00
|
|
|
goto msg_full;
|
2019-02-18 22:25:45 +02:00
|
|
|
}
|
|
|
|
|
2019-10-16 09:23:06 +03:00
|
|
|
ret = fill_func(skb, has_cap_net_admin, res, port);
|
|
|
|
|
2018-01-28 11:17:25 +02:00
|
|
|
rdma_restrack_put(res);
|
|
|
|
|
2019-02-18 22:25:48 +02:00
|
|
|
if (ret) {
|
2019-02-18 22:25:45 +02:00
|
|
|
nla_nest_cancel(skb, entry_attr);
|
2019-02-18 22:25:48 +02:00
|
|
|
if (ret == -EMSGSIZE)
|
|
|
|
goto msg_full;
|
|
|
|
if (ret == -EAGAIN)
|
|
|
|
goto again;
|
2018-01-28 11:17:25 +02:00
|
|
|
goto res_err;
|
2019-02-18 22:25:48 +02:00
|
|
|
}
|
2019-02-18 22:25:45 +02:00
|
|
|
nla_nest_end(skb, entry_attr);
|
2019-02-18 22:25:48 +02:00
|
|
|
again: xa_lock(&rt->xa);
|
2018-01-28 11:17:25 +02:00
|
|
|
next: idx++;
|
|
|
|
}
|
2019-02-18 22:25:48 +02:00
|
|
|
xa_unlock(&rt->xa);
|
2018-01-28 11:17:25 +02:00
|
|
|
|
2019-02-18 22:25:48 +02:00
|
|
|
msg_full:
|
2018-01-28 11:17:25 +02:00
|
|
|
nla_nest_end(skb, table_attr);
|
|
|
|
nlmsg_end(skb, nlh);
|
|
|
|
cb->args[0] = idx;
|
|
|
|
|
|
|
|
/*
|
2018-03-01 13:57:29 -08:00
|
|
|
* No more entries to fill, cancel the message and
|
2018-01-28 11:17:25 +02:00
|
|
|
* return 0 to mark end of dumpit.
|
|
|
|
*/
|
2018-03-01 13:57:29 -08:00
|
|
|
if (!filled)
|
2018-01-28 11:17:25 +02:00
|
|
|
goto err;
|
|
|
|
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2018-01-28 11:17:25 +02:00
|
|
|
return skb->len;
|
|
|
|
|
|
|
|
res_err:
|
|
|
|
nla_nest_cancel(skb, table_attr);
|
|
|
|
|
|
|
|
err:
|
|
|
|
nlmsg_cancel(skb, nlh);
|
|
|
|
|
|
|
|
err_index:
|
RDMA/core: Sync unregistration with netlink commands
When the rdma device is getting removed, get resource info can race with
device removal, as below:
CPU-0 CPU-1
-------- --------
rdma_nl_rcv_msg()
nldev_res_get_cq_dumpit()
mutex_lock(device_lock);
get device reference
mutex_unlock(device_lock); [..]
ib_unregister_device()
/* Valid reference to
* device->dev exists.
*/
ib_dealloc_device()
[..]
provider->fill_res_entry();
Even though device object is not freed, fill_res_entry() can get called on
device which doesn't have a driver anymore. Kernel core device reference
count is not sufficient, as this only keeps the structure valid, and
doesn't guarantee the driver is still loaded.
Similar race can occur with device renaming and device removal, where
device_rename() tries to rename a unregistered device. While this is fine
for devices of a class which are not net namespace aware, but it is
incorrect for net namespace aware class coming in subsequent series. If a
class is net namespace aware, then the below [1] call trace is observed in
above situation.
Therefore, to avoid the race, keep a reference count and let device
unregistration wait until all netlink users drop the reference.
[1] Call trace:
kernfs: ns required in 'infiniband' for 'mlx5_0'
WARNING: CPU: 18 PID: 44270 at fs/kernfs/dir.c:842 kernfs_find_ns+0x104/0x120
libahci i2c_core mlxfw libata dca [last unloaded: devlink]
RIP: 0010:kernfs_find_ns+0x104/0x120
Call Trace:
kernfs_find_and_get_ns+0x2e/0x50
sysfs_rename_link_ns+0x40/0xb0
device_rename+0xb2/0xf0
ib_device_rename+0xb3/0x100 [ib_core]
nldev_set_doit+0x165/0x190 [ib_core]
rdma_nl_rcv_msg+0x249/0x250 [ib_core]
? netlink_deliver_tap+0x8f/0x3e0
rdma_nl_rcv+0xd6/0x120 [ib_core]
netlink_unicast+0x17c/0x230
netlink_sendmsg+0x2f0/0x3e0
sock_sendmsg+0x30/0x40
__sys_sendto+0xdc/0x160
Fixes: da5c85078215 ("RDMA/nldev: add driver-specific resource tracking")
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
2018-11-16 03:50:57 +02:00
|
|
|
ib_device_put(device);
|
2018-01-28 11:17:25 +02:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
#define RES_GET_FUNCS(name, type) \
|
|
|
|
static int nldev_res_get_##name##_dumpit(struct sk_buff *skb, \
|
2019-01-30 12:48:53 +02:00
|
|
|
struct netlink_callback *cb) \
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
{ \
|
|
|
|
return res_get_common_dumpit(skb, cb, type, \
|
|
|
|
fill_res_##name##_entry); \
|
|
|
|
} \
|
|
|
|
static int nldev_res_get_##name##_doit(struct sk_buff *skb, \
|
|
|
|
struct nlmsghdr *nlh, \
|
2019-02-18 22:25:45 +02:00
|
|
|
struct netlink_ext_ack *extack) \
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
{ \
|
|
|
|
return res_get_common_doit(skb, nlh, extack, type, \
|
|
|
|
fill_res_##name##_entry); \
|
2019-01-30 12:48:53 +02:00
|
|
|
}
|
2018-03-01 13:58:13 -08:00
|
|
|
|
2019-01-30 12:48:53 +02:00
|
|
|
RES_GET_FUNCS(qp, RDMA_RESTRACK_QP);
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
RES_GET_FUNCS(qp_raw, RDMA_RESTRACK_QP);
|
2019-01-30 12:48:53 +02:00
|
|
|
RES_GET_FUNCS(cm_id, RDMA_RESTRACK_CM_ID);
|
|
|
|
RES_GET_FUNCS(cq, RDMA_RESTRACK_CQ);
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
RES_GET_FUNCS(cq_raw, RDMA_RESTRACK_CQ);
|
2019-01-30 12:48:53 +02:00
|
|
|
RES_GET_FUNCS(pd, RDMA_RESTRACK_PD);
|
|
|
|
RES_GET_FUNCS(mr, RDMA_RESTRACK_MR);
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
RES_GET_FUNCS(mr_raw, RDMA_RESTRACK_MR);
|
2019-07-02 13:02:40 +03:00
|
|
|
RES_GET_FUNCS(counter, RDMA_RESTRACK_COUNTER);
|
2021-04-18 16:41:23 +03:00
|
|
|
RES_GET_FUNCS(ctx, RDMA_RESTRACK_CTX);
|
2021-04-18 16:41:25 +03:00
|
|
|
RES_GET_FUNCS(srq, RDMA_RESTRACK_SRQ);
|
2023-09-18 21:11:09 +08:00
|
|
|
RES_GET_FUNCS(srq_raw, RDMA_RESTRACK_SRQ);
|
2018-03-01 13:58:28 -08:00
|
|
|
|
2019-02-15 11:03:53 -08:00
|
|
|
static LIST_HEAD(link_ops);
|
|
|
|
static DECLARE_RWSEM(link_ops_rwsem);
|
|
|
|
|
|
|
|
static const struct rdma_link_ops *link_ops_get(const char *type)
|
|
|
|
{
|
|
|
|
const struct rdma_link_ops *ops;
|
|
|
|
|
|
|
|
list_for_each_entry(ops, &link_ops, list) {
|
|
|
|
if (!strcmp(ops->type, type))
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
ops = NULL;
|
|
|
|
out:
|
|
|
|
return ops;
|
|
|
|
}
|
|
|
|
|
|
|
|
void rdma_link_register(struct rdma_link_ops *ops)
|
|
|
|
{
|
|
|
|
down_write(&link_ops_rwsem);
|
2019-02-22 09:29:02 +03:00
|
|
|
if (WARN_ON_ONCE(link_ops_get(ops->type)))
|
2019-02-15 11:03:53 -08:00
|
|
|
goto out;
|
|
|
|
list_add(&ops->list, &link_ops);
|
|
|
|
out:
|
|
|
|
up_write(&link_ops_rwsem);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_link_register);
|
|
|
|
|
|
|
|
void rdma_link_unregister(struct rdma_link_ops *ops)
|
|
|
|
{
|
|
|
|
down_write(&link_ops_rwsem);
|
|
|
|
list_del(&ops->list);
|
|
|
|
up_write(&link_ops_rwsem);
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL(rdma_link_unregister);
|
|
|
|
|
|
|
|
static int nldev_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
char ibdev_name[IB_DEVICE_NAME_MAX];
|
|
|
|
const struct rdma_link_ops *ops;
|
|
|
|
char ndev_name[IFNAMSIZ];
|
|
|
|
struct net_device *ndev;
|
|
|
|
char type[IFNAMSIZ];
|
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
2019-02-15 11:03:53 -08:00
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_DEV_NAME] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_LINK_TYPE] || !tb[RDMA_NLDEV_ATTR_NDEV_NAME])
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-11-15 18:08:06 +01:00
|
|
|
nla_strscpy(ibdev_name, tb[RDMA_NLDEV_ATTR_DEV_NAME],
|
2019-02-15 11:03:53 -08:00
|
|
|
sizeof(ibdev_name));
|
2020-03-09 16:05:15 -03:00
|
|
|
if (strchr(ibdev_name, '%') || strlen(ibdev_name) == 0)
|
2019-02-15 11:03:53 -08:00
|
|
|
return -EINVAL;
|
|
|
|
|
2020-11-15 18:08:06 +01:00
|
|
|
nla_strscpy(type, tb[RDMA_NLDEV_ATTR_LINK_TYPE], sizeof(type));
|
|
|
|
nla_strscpy(ndev_name, tb[RDMA_NLDEV_ATTR_NDEV_NAME],
|
2019-02-15 11:03:53 -08:00
|
|
|
sizeof(ndev_name));
|
|
|
|
|
2019-07-04 16:04:01 +03:00
|
|
|
ndev = dev_get_by_name(sock_net(skb->sk), ndev_name);
|
2019-02-15 11:03:53 -08:00
|
|
|
if (!ndev)
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
down_read(&link_ops_rwsem);
|
|
|
|
ops = link_ops_get(type);
|
|
|
|
#ifdef CONFIG_MODULES
|
|
|
|
if (!ops) {
|
|
|
|
up_read(&link_ops_rwsem);
|
|
|
|
request_module("rdma-link-%s", type);
|
|
|
|
down_read(&link_ops_rwsem);
|
|
|
|
ops = link_ops_get(type);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
err = ops ? ops->newlink(ibdev_name, ndev) : -EINVAL;
|
|
|
|
up_read(&link_ops_rwsem);
|
|
|
|
dev_put(ndev);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_dellink(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
u32 index;
|
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
2019-02-15 11:03:53 -08:00
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
2019-02-26 14:01:47 +02:00
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
2019-02-15 11:03:53 -08:00
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2022-04-04 12:26:42 -03:00
|
|
|
if (!(device->attrs.kernel_cap_flags & IBK_ALLOW_USER_UNREG)) {
|
2019-02-15 11:03:53 -08:00
|
|
|
ib_device_put(device);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
ib_unregister_device_and_put(device);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-06-13 21:38:18 -03:00
|
|
|
static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
2019-06-21 17:00:44 -04:00
|
|
|
char client_name[RDMA_NLDEV_ATTR_CHARDEV_TYPE_SIZE];
|
2019-06-13 21:38:18 -03:00
|
|
|
struct ib_client_nl_info data = {};
|
|
|
|
struct ib_device *ibdev = NULL;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index;
|
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
|
|
|
|
NL_VALIDATE_LIBERAL, extack);
|
2019-06-13 21:38:18 -03:00
|
|
|
if (err || !tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE])
|
|
|
|
return -EINVAL;
|
|
|
|
|
2020-11-15 18:08:06 +01:00
|
|
|
nla_strscpy(client_name, tb[RDMA_NLDEV_ATTR_CHARDEV_TYPE],
|
2019-06-21 17:00:44 -04:00
|
|
|
sizeof(client_name));
|
2019-06-13 21:38:18 -03:00
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_DEV_INDEX]) {
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
ibdev = ib_device_get_by_index(sock_net(skb->sk), index);
|
|
|
|
if (!ibdev)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
|
|
|
|
data.port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(ibdev, data.port)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
data.port = -1;
|
|
|
|
}
|
|
|
|
} else if (tb[RDMA_NLDEV_ATTR_PORT_INDEX]) {
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_GET_CHARDEV),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
err = -EMSGSIZE;
|
|
|
|
goto out_nlmsg;
|
|
|
|
}
|
2019-06-13 21:38:18 -03:00
|
|
|
|
|
|
|
data.nl_msg = msg;
|
|
|
|
err = ib_get_client_nl_info(ibdev, client_name, &data);
|
|
|
|
if (err)
|
|
|
|
goto out_nlmsg;
|
|
|
|
|
|
|
|
err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV,
|
|
|
|
huge_encode_dev(data.cdev->devt),
|
|
|
|
RDMA_NLDEV_ATTR_PAD);
|
|
|
|
if (err)
|
|
|
|
goto out_data;
|
|
|
|
err = nla_put_u64_64bit(msg, RDMA_NLDEV_ATTR_CHARDEV_ABI, data.abi,
|
|
|
|
RDMA_NLDEV_ATTR_PAD);
|
|
|
|
if (err)
|
|
|
|
goto out_data;
|
|
|
|
if (nla_put_string(msg, RDMA_NLDEV_ATTR_CHARDEV_NAME,
|
|
|
|
dev_name(data.cdev))) {
|
|
|
|
err = -EMSGSIZE;
|
|
|
|
goto out_data;
|
|
|
|
}
|
|
|
|
|
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
put_device(data.cdev);
|
|
|
|
if (ibdev)
|
|
|
|
ib_device_put(ibdev);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-06-13 21:38:18 -03:00
|
|
|
|
|
|
|
out_data:
|
|
|
|
put_device(data.cdev);
|
|
|
|
out_nlmsg:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
out_put:
|
|
|
|
if (ibdev)
|
|
|
|
ib_device_put(ibdev);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-05-13 08:26:57 +03:00
|
|
|
static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
2019-02-26 14:01:48 +02:00
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
2019-05-13 08:26:57 +03:00
|
|
|
struct sk_buff *msg;
|
2019-02-26 14:01:48 +02:00
|
|
|
int err;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
err = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2019-02-26 14:01:48 +02:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2019-05-13 08:26:57 +03:00
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
2019-02-26 14:01:48 +02:00
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_SYS_GET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
nlmsg_free(msg);
|
|
|
|
return -EMSGSIZE;
|
|
|
|
}
|
2019-02-26 14:01:48 +02:00
|
|
|
|
2019-05-13 08:26:57 +03:00
|
|
|
err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
|
2019-02-26 14:01:48 +02:00
|
|
|
(u8)ib_devices_shared_netns);
|
|
|
|
if (err) {
|
2019-05-13 08:26:57 +03:00
|
|
|
nlmsg_free(msg);
|
2019-02-26 14:01:48 +02:00
|
|
|
return err;
|
|
|
|
}
|
2021-04-18 15:10:25 +03:00
|
|
|
|
2023-10-09 13:43:58 +03:00
|
|
|
err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE,
|
|
|
|
(u8)privileged_qkey);
|
|
|
|
if (err) {
|
|
|
|
nlmsg_free(msg);
|
|
|
|
return err;
|
|
|
|
}
|
2024-09-09 20:30:25 +03:00
|
|
|
|
|
|
|
err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_MONITOR_MODE, 1);
|
|
|
|
if (err) {
|
|
|
|
nlmsg_free(msg);
|
|
|
|
return err;
|
|
|
|
}
|
2021-04-18 15:10:25 +03:00
|
|
|
/*
|
|
|
|
* Copy-on-fork is supported.
|
|
|
|
* See commits:
|
|
|
|
* 70e806e4e645 ("mm: Do early cow for pinned pages during fork() for ptes")
|
|
|
|
* 4eae4efa2c29 ("hugetlb: do early cow when page pinned on src mm")
|
|
|
|
* for more details. Don't backport this without them.
|
|
|
|
*
|
|
|
|
* Return value ignored on purpose, assume copy-on-fork is not
|
|
|
|
* supported in case of failure.
|
|
|
|
*/
|
|
|
|
nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_COPY_ON_FORK, 1);
|
|
|
|
|
2019-05-13 08:26:57 +03:00
|
|
|
nlmsg_end(msg, nlh);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-02-26 14:01:48 +02:00
|
|
|
}
|
|
|
|
|
2023-10-09 13:43:58 +03:00
|
|
|
static int nldev_set_sys_set_netns_doit(struct nlattr *tb[])
|
2019-02-26 14:01:49 +02:00
|
|
|
{
|
|
|
|
u8 enable;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE]);
|
|
|
|
/* Only 0 and 1 are supported */
|
|
|
|
if (enable > 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
err = rdma_compatdev_set(enable);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2023-10-09 13:43:58 +03:00
|
|
|
static int nldev_set_sys_set_pqkey_doit(struct nlattr *tb[])
|
|
|
|
{
|
|
|
|
u8 enable;
|
|
|
|
|
|
|
|
enable = nla_get_u8(tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE]);
|
|
|
|
/* Only 0 and 1 are supported */
|
|
|
|
if (enable > 1)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
privileged_qkey = enable;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_set_sys_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
int err;
|
|
|
|
|
|
|
|
err = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
|
|
|
if (err)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_SYS_ATTR_NETNS_MODE])
|
|
|
|
return nldev_set_sys_set_netns_doit(tb);
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_SYS_ATTR_PRIVILEGED_QKEY_MODE])
|
|
|
|
return nldev_set_sys_set_pqkey_doit(tb);
|
|
|
|
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-10-08 15:24:34 +03:00
|
|
|
static int nldev_stat_set_mode_doit(struct sk_buff *msg,
|
|
|
|
struct netlink_ext_ack *extack,
|
|
|
|
struct nlattr *tb[],
|
|
|
|
struct ib_device *device, u32 port)
|
|
|
|
{
|
|
|
|
u32 mode, mask = 0, qpn, cntn = 0;
|
2025-03-13 16:18:43 +02:00
|
|
|
bool opcnt = false;
|
2021-10-08 15:24:34 +03:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/* Currently only counter for QP is supported */
|
2021-11-17 14:27:04 +02:00
|
|
|
if (!tb[RDMA_NLDEV_ATTR_STAT_RES] ||
|
|
|
|
nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
|
2021-10-08 15:24:34 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
2025-03-13 16:18:43 +02:00
|
|
|
if (tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED])
|
|
|
|
opcnt = !!nla_get_u8(
|
|
|
|
tb[RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED]);
|
|
|
|
|
2021-10-08 15:24:34 +03:00
|
|
|
mode = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_MODE]);
|
|
|
|
if (mode == RDMA_COUNTER_MODE_AUTO) {
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK])
|
|
|
|
mask = nla_get_u32(
|
|
|
|
tb[RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK]);
|
2025-03-13 16:18:43 +02:00
|
|
|
return rdma_counter_set_auto_mode(device, port, mask, opcnt,
|
|
|
|
extack);
|
2021-10-08 15:24:34 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!tb[RDMA_NLDEV_ATTR_RES_LQPN])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]) {
|
|
|
|
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
|
|
|
|
ret = rdma_counter_bind_qpn(device, port, qpn, cntn);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
} else {
|
|
|
|
ret = rdma_counter_bind_qpn_alloc(device, port, qpn, &cntn);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_fill;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_fill:
|
|
|
|
rdma_counter_unbind_qpn(device, port, qpn, cntn);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-10-08 15:24:35 +03:00
|
|
|
static int nldev_stat_set_counter_dynamic_doit(struct nlattr *tb[],
|
|
|
|
struct ib_device *device,
|
|
|
|
u32 port)
|
|
|
|
{
|
|
|
|
struct rdma_hw_stats *stats;
|
|
|
|
struct nlattr *entry_attr;
|
|
|
|
unsigned long *target;
|
2022-03-16 11:39:48 +03:00
|
|
|
int rem, i, ret = 0;
|
|
|
|
u32 index;
|
2021-10-08 15:24:35 +03:00
|
|
|
|
|
|
|
stats = ib_get_hw_stats_port(device, port);
|
|
|
|
if (!stats)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
target = kcalloc(BITS_TO_LONGS(stats->num_counters),
|
|
|
|
sizeof(*stats->is_disabled), GFP_KERNEL);
|
|
|
|
if (!target)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
nla_for_each_nested(entry_attr, tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS],
|
|
|
|
rem) {
|
|
|
|
index = nla_get_u32(entry_attr);
|
|
|
|
if ((index >= stats->num_counters) ||
|
|
|
|
!(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
set_bit(index, target);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < stats->num_counters; i++) {
|
|
|
|
if (!(stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
ret = rdma_counter_modify(device, port, i, test_bit(i, target));
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
kfree(target);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-07-02 13:02:39 +03:00
|
|
|
static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
2021-10-08 15:24:34 +03:00
|
|
|
u32 index, port;
|
2019-07-02 13:02:39 +03:00
|
|
|
int ret;
|
|
|
|
|
2021-10-08 15:24:34 +03:00
|
|
|
ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1, nldev_policy,
|
|
|
|
extack);
|
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_PORT_INDEX])
|
2019-07-02 13:02:39 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
ret = -EINVAL;
|
2021-10-08 15:24:34 +03:00
|
|
|
goto err_put_device;
|
|
|
|
}
|
|
|
|
|
2021-10-08 15:24:35 +03:00
|
|
|
if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] &&
|
|
|
|
!tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
|
2021-10-08 15:24:34 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_put_device;
|
2019-07-02 13:02:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
2021-10-08 15:24:34 +03:00
|
|
|
goto err_put_device;
|
2019-07-02 13:02:39 +03:00
|
|
|
}
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_STAT_SET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_nldev_handle(msg, device) ||
|
2021-10-08 15:24:34 +03:00
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_free_msg;
|
2019-07-02 13:02:39 +03:00
|
|
|
}
|
|
|
|
|
2021-10-08 15:24:35 +03:00
|
|
|
if (tb[RDMA_NLDEV_ATTR_STAT_MODE]) {
|
|
|
|
ret = nldev_stat_set_mode_doit(msg, extack, tb, device, port);
|
|
|
|
if (ret)
|
|
|
|
goto err_free_msg;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_STAT_HWCOUNTERS]) {
|
|
|
|
ret = nldev_stat_set_counter_dynamic_doit(tb, device, port);
|
|
|
|
if (ret)
|
|
|
|
goto err_free_msg;
|
|
|
|
}
|
2021-10-08 15:24:34 +03:00
|
|
|
|
2019-07-02 13:02:44 +03:00
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-07-02 13:02:44 +03:00
|
|
|
|
2021-10-08 15:24:34 +03:00
|
|
|
err_free_msg:
|
2019-07-02 13:02:44 +03:00
|
|
|
nlmsg_free(msg);
|
2021-10-08 15:24:34 +03:00
|
|
|
err_put_device:
|
2019-07-02 13:02:44 +03:00
|
|
|
ib_device_put(device);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index, port, qpn, cntn;
|
|
|
|
int ret;
|
2019-07-02 13:02:39 +03:00
|
|
|
|
2019-07-02 13:02:44 +03:00
|
|
|
ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_RES_LQPN])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES]) != RDMA_NLDEV_ATTR_RES_QP)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_STAT_SET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_fill;
|
|
|
|
}
|
2019-07-02 13:02:44 +03:00
|
|
|
|
|
|
|
cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
|
|
|
|
qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
|
|
|
|
if (fill_nldev_handle(msg, device) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_COUNTER_ID, cntn) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qpn)) {
|
2019-07-02 13:02:39 +03:00
|
|
|
ret = -EMSGSIZE;
|
2019-07-02 13:02:44 +03:00
|
|
|
goto err_fill;
|
2019-07-02 13:02:39 +03:00
|
|
|
}
|
|
|
|
|
2019-10-02 14:56:27 +03:00
|
|
|
ret = rdma_counter_unbind_qpn(device, port, qpn, cntn);
|
|
|
|
if (ret)
|
|
|
|
goto err_fill;
|
|
|
|
|
2019-07-02 13:02:39 +03:00
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-07-02 13:02:39 +03:00
|
|
|
|
2019-07-02 13:02:44 +03:00
|
|
|
err_fill:
|
2019-07-02 13:02:39 +03:00
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
|
|
|
ib_device_put(device);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-06-20 13:33:26 +02:00
|
|
|
static noinline_for_stack int
|
|
|
|
stat_get_doit_default_counter(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack,
|
|
|
|
struct nlattr *tb[])
|
2019-07-02 13:02:46 +03:00
|
|
|
{
|
|
|
|
struct rdma_hw_stats *stats;
|
|
|
|
struct nlattr *table_attr;
|
|
|
|
struct ib_device *device;
|
|
|
|
int ret, num_cnts, i;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index, port;
|
|
|
|
u64 v;
|
|
|
|
|
|
|
|
if (!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
2021-06-11 19:00:20 +03:00
|
|
|
if (!device->ops.alloc_hw_port_stats || !device->ops.get_hw_stats) {
|
2019-07-02 13:02:46 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
2021-06-11 19:00:21 +03:00
|
|
|
stats = ib_get_hw_stats_port(device, port);
|
|
|
|
if (!stats) {
|
2019-07-02 13:02:46 +03:00
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_STAT_GET),
|
|
|
|
0, 0);
|
|
|
|
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_nldev_handle(msg, device) ||
|
2019-07-02 13:02:46 +03:00
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_msg;
|
|
|
|
}
|
|
|
|
|
|
|
|
mutex_lock(&stats->lock);
|
|
|
|
|
|
|
|
num_cnts = device->ops.get_hw_stats(device, stats, port, 0);
|
|
|
|
if (num_cnts < 0) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err_stats;
|
|
|
|
}
|
|
|
|
|
|
|
|
table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
|
|
|
|
if (!table_attr) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_stats;
|
|
|
|
}
|
|
|
|
for (i = 0; i < num_cnts; i++) {
|
2021-10-08 15:24:31 +03:00
|
|
|
if (test_bit(i, stats->is_disabled))
|
|
|
|
continue;
|
|
|
|
|
2019-07-02 13:02:46 +03:00
|
|
|
v = stats->value[i] +
|
|
|
|
rdma_counter_get_hwstat_value(device, port, i);
|
2021-10-08 15:24:29 +03:00
|
|
|
if (rdma_nl_stat_hwcounter_entry(msg,
|
|
|
|
stats->descs[i].name, v)) {
|
2019-07-02 13:02:46 +03:00
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_table;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
nla_nest_end(msg, table_attr);
|
|
|
|
|
|
|
|
mutex_unlock(&stats->lock);
|
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-07-02 13:02:46 +03:00
|
|
|
|
|
|
|
err_table:
|
|
|
|
nla_nest_cancel(msg, table_attr);
|
|
|
|
err_stats:
|
|
|
|
mutex_unlock(&stats->lock);
|
|
|
|
err_msg:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
|
|
|
ib_device_put(device);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2025-06-20 13:33:26 +02:00
|
|
|
static noinline_for_stack int
|
|
|
|
stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack, struct nlattr *tb[])
|
2019-07-02 13:02:45 +03:00
|
|
|
|
|
|
|
{
|
|
|
|
static enum rdma_nl_counter_mode mode;
|
|
|
|
static enum rdma_nl_counter_mask mask;
|
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 index, port;
|
2025-03-13 16:18:43 +02:00
|
|
|
bool opcnt;
|
2019-07-02 13:02:45 +03:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
if (tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID])
|
|
|
|
return nldev_res_get_counter_doit(skb, nlh, extack);
|
|
|
|
|
|
|
|
if (!tb[RDMA_NLDEV_ATTR_STAT_MODE] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_DEV_INDEX] || !tb[RDMA_NLDEV_ATTR_PORT_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
index = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), index);
|
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
|
|
|
|
RDMA_NLDEV_CMD_STAT_GET),
|
|
|
|
0, 0);
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_msg;
|
|
|
|
}
|
2019-07-02 13:02:45 +03:00
|
|
|
|
2025-03-13 16:18:43 +02:00
|
|
|
ret = rdma_counter_get_mode(device, port, &mode, &mask, &opcnt);
|
2019-07-02 13:02:45 +03:00
|
|
|
if (ret)
|
|
|
|
goto err_msg;
|
|
|
|
|
|
|
|
if (fill_nldev_handle(msg, device) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port) ||
|
2019-08-09 13:13:19 +03:00
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_MODE, mode)) {
|
|
|
|
ret = -EMSGSIZE;
|
2019-07-02 13:02:45 +03:00
|
|
|
goto err_msg;
|
2019-08-09 13:13:19 +03:00
|
|
|
}
|
2019-07-02 13:02:45 +03:00
|
|
|
|
|
|
|
if ((mode == RDMA_COUNTER_MODE_AUTO) &&
|
2019-08-09 13:13:19 +03:00
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_AUTO_MODE_MASK, mask)) {
|
|
|
|
ret = -EMSGSIZE;
|
2019-07-02 13:02:45 +03:00
|
|
|
goto err_msg;
|
2019-08-09 13:13:19 +03:00
|
|
|
}
|
2019-07-02 13:02:45 +03:00
|
|
|
|
2025-03-13 16:18:43 +02:00
|
|
|
if ((mode == RDMA_COUNTER_MODE_AUTO) &&
|
|
|
|
nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_OPCOUNTER_ENABLED, opcnt)) {
|
|
|
|
ret = -EMSGSIZE;
|
|
|
|
goto err_msg;
|
|
|
|
}
|
|
|
|
|
2019-07-02 13:02:45 +03:00
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
ib_device_put(device);
|
2019-07-23 10:02:05 +03:00
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
2019-07-02 13:02:45 +03:00
|
|
|
|
|
|
|
err_msg:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
|
|
|
ib_device_put(device);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-07-02 13:02:40 +03:00
|
|
|
static int nldev_stat_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
int ret;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2019-07-02 13:02:46 +03:00
|
|
|
if (ret)
|
2019-07-02 13:02:40 +03:00
|
|
|
return -EINVAL;
|
|
|
|
|
2019-07-02 13:02:46 +03:00
|
|
|
if (!tb[RDMA_NLDEV_ATTR_STAT_RES])
|
|
|
|
return stat_get_doit_default_counter(skb, nlh, extack, tb);
|
|
|
|
|
2019-07-02 13:02:40 +03:00
|
|
|
switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
|
|
|
|
case RDMA_NLDEV_ATTR_RES_QP:
|
2019-07-02 13:02:45 +03:00
|
|
|
ret = stat_get_doit_qp(skb, nlh, extack, tb);
|
2019-07-02 13:02:40 +03:00
|
|
|
break;
|
2019-10-16 09:23:08 +03:00
|
|
|
case RDMA_NLDEV_ATTR_RES_MR:
|
|
|
|
ret = res_get_common_doit(skb, nlh, extack, RDMA_RESTRACK_MR,
|
|
|
|
fill_stat_mr_entry);
|
|
|
|
break;
|
2019-07-02 13:02:40 +03:00
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_stat_get_dumpit(struct sk_buff *skb,
|
|
|
|
struct netlink_callback *cb)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
int ret;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
ret = __nlmsg_parse(cb->nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, NULL);
|
2019-07-02 13:02:40 +03:00
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_STAT_RES])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_RES])) {
|
|
|
|
case RDMA_NLDEV_ATTR_RES_QP:
|
|
|
|
ret = nldev_res_get_counter_dumpit(skb, cb);
|
|
|
|
break;
|
2019-10-16 09:23:08 +03:00
|
|
|
case RDMA_NLDEV_ATTR_RES_MR:
|
|
|
|
ret = res_get_common_dumpit(skb, cb, RDMA_RESTRACK_MR,
|
|
|
|
fill_stat_mr_entry);
|
|
|
|
break;
|
2019-07-02 13:02:40 +03:00
|
|
|
default:
|
|
|
|
ret = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2021-10-08 15:24:33 +03:00
|
|
|
static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
|
|
|
|
struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX], *table, *entry;
|
|
|
|
struct rdma_hw_stats *stats;
|
|
|
|
struct ib_device *device;
|
|
|
|
struct sk_buff *msg;
|
|
|
|
u32 devid, port;
|
|
|
|
int ret, i;
|
|
|
|
|
RDMA/nldev: Enhance netlink message parsing and validation
Use strict parsing validation for set commands, and liberal
validation for get commands. Additionally, remove all usage of
nlmsg_parse_depricate().
Strict parsing validation fails when encountering unrecognized
attributes in the Netlink message, while liberal parsing
validation ignores them.
In 57d7a8fd904c ("rdma: Add an option to display driver-specific QPs in the rdma tool")
in iproute2, the attribute RDMA_NLDEV_ATTR_DRIVER_DETAILS
was added. This cause backwards compatibility issues when using
the rdma tool with the new attribute and an older kernel which does
recognize this attribute.
In this case, the command "rdma stat show mr" would fail, because the
new rdma tool would fill the netlink message with the new attribute and
the older kernel would fail as it used strict parsing and did not
recognize the new attribute.
In general, strict validation is appropriate for set commands as they
modify the system, while liberal validation is suitable for get
commands which only query system information.
Replace all uses of nlmsg_parse_deprecated() with __nlmsg_parse(),
using the NL_VALIDATE_LIBERAL flag.
The nlmsg_parse_deprecated() function internally calls
__nlmsg_parse() with the NL_VALIDATE_LIBERAL flag, but its name
is confusing.
Signed-off-by: Chiara Meiohas <cmeiohas@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/f633a979a49db090d05c24a3ba83d30727bb777b.1722331020.git.leon@kernel.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
2024-07-30 12:17:25 +03:00
|
|
|
ret = __nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, NL_VALIDATE_LIBERAL, extack);
|
2021-10-08 15:24:33 +03:00
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_PORT_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), devid);
|
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
port = nla_get_u32(tb[RDMA_NLDEV_ATTR_PORT_INDEX]);
|
|
|
|
if (!rdma_is_port_valid(device, port)) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
stats = ib_get_hw_stats_port(device, port);
|
|
|
|
if (!stats) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!msg) {
|
|
|
|
ret = -ENOMEM;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
nlh = nlmsg_put(
|
|
|
|
msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_STAT_GET_STATUS),
|
|
|
|
0, 0);
|
|
|
|
|
|
|
|
ret = -EMSGSIZE;
|
2022-11-28 13:52:45 +02:00
|
|
|
if (!nlh || fill_nldev_handle(msg, device) ||
|
2021-10-08 15:24:33 +03:00
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
|
|
|
|
goto err_msg;
|
|
|
|
|
|
|
|
table = nla_nest_start(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTERS);
|
|
|
|
if (!table)
|
|
|
|
goto err_msg;
|
|
|
|
|
|
|
|
mutex_lock(&stats->lock);
|
|
|
|
for (i = 0; i < stats->num_counters; i++) {
|
|
|
|
entry = nla_nest_start(msg,
|
|
|
|
RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY);
|
|
|
|
if (!entry)
|
|
|
|
goto err_msg_table;
|
|
|
|
|
|
|
|
if (nla_put_string(msg,
|
|
|
|
RDMA_NLDEV_ATTR_STAT_HWCOUNTER_ENTRY_NAME,
|
|
|
|
stats->descs[i].name) ||
|
|
|
|
nla_put_u32(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_INDEX, i))
|
|
|
|
goto err_msg_entry;
|
|
|
|
|
|
|
|
if ((stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) &&
|
|
|
|
(nla_put_u8(msg, RDMA_NLDEV_ATTR_STAT_HWCOUNTER_DYNAMIC,
|
|
|
|
!test_bit(i, stats->is_disabled))))
|
|
|
|
goto err_msg_entry;
|
|
|
|
|
|
|
|
nla_nest_end(msg, entry);
|
|
|
|
}
|
|
|
|
mutex_unlock(&stats->lock);
|
|
|
|
|
|
|
|
nla_nest_end(msg, table);
|
|
|
|
nlmsg_end(msg, nlh);
|
|
|
|
ib_device_put(device);
|
|
|
|
return rdma_nl_unicast(sock_net(skb->sk), msg, NETLINK_CB(skb).portid);
|
|
|
|
|
|
|
|
err_msg_entry:
|
|
|
|
nla_nest_cancel(msg, entry);
|
|
|
|
err_msg_table:
|
|
|
|
mutex_unlock(&stats->lock);
|
|
|
|
nla_nest_cancel(msg, table);
|
|
|
|
err_msg:
|
|
|
|
nlmsg_free(msg);
|
|
|
|
err:
|
|
|
|
ib_device_put(device);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-06-16 19:08:40 +03:00
|
|
|
static int nldev_newdev(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
enum rdma_nl_dev_type type;
|
|
|
|
struct ib_device *parent;
|
|
|
|
char name[IFNAMSIZ] = {};
|
|
|
|
u32 parentid;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX] ||
|
|
|
|
!tb[RDMA_NLDEV_ATTR_DEV_NAME] || !tb[RDMA_NLDEV_ATTR_DEV_TYPE])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
nla_strscpy(name, tb[RDMA_NLDEV_ATTR_DEV_NAME], sizeof(name));
|
|
|
|
type = nla_get_u8(tb[RDMA_NLDEV_ATTR_DEV_TYPE]);
|
|
|
|
parentid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
parent = ib_device_get_by_index(sock_net(skb->sk), parentid);
|
|
|
|
if (!parent)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
ret = ib_add_sub_device(parent, type, name);
|
|
|
|
ib_device_put(parent);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int nldev_deldev(struct sk_buff *skb, struct nlmsghdr *nlh,
|
|
|
|
struct netlink_ext_ack *extack)
|
|
|
|
{
|
|
|
|
struct nlattr *tb[RDMA_NLDEV_ATTR_MAX];
|
|
|
|
struct ib_device *device;
|
|
|
|
u32 devid;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = nlmsg_parse(nlh, 0, tb, RDMA_NLDEV_ATTR_MAX - 1,
|
|
|
|
nldev_policy, extack);
|
|
|
|
if (ret || !tb[RDMA_NLDEV_ATTR_DEV_INDEX])
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
devid = nla_get_u32(tb[RDMA_NLDEV_ATTR_DEV_INDEX]);
|
|
|
|
device = ib_device_get_by_index(sock_net(skb->sk), devid);
|
|
|
|
if (!device)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
return ib_del_sub_device_and_put(device);
|
|
|
|
}
|
|
|
|
|
2017-12-05 22:30:04 +02:00
|
|
|
static const struct rdma_nl_cbs nldev_cb_table[RDMA_NLDEV_NUM_OPS] = {
|
2017-06-20 09:59:14 +03:00
|
|
|
[RDMA_NLDEV_CMD_GET] = {
|
2017-06-15 20:33:08 +03:00
|
|
|
.doit = nldev_get_doit,
|
2017-06-20 09:59:14 +03:00
|
|
|
.dump = nldev_get_dumpit,
|
|
|
|
},
|
2019-06-13 21:38:18 -03:00
|
|
|
[RDMA_NLDEV_CMD_GET_CHARDEV] = {
|
|
|
|
.doit = nldev_get_chardev,
|
|
|
|
},
|
2018-10-10 09:19:12 +03:00
|
|
|
[RDMA_NLDEV_CMD_SET] = {
|
|
|
|
.doit = nldev_set_doit,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
2019-02-15 11:03:53 -08:00
|
|
|
[RDMA_NLDEV_CMD_NEWLINK] = {
|
|
|
|
.doit = nldev_newlink,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
|
|
|
[RDMA_NLDEV_CMD_DELLINK] = {
|
|
|
|
.doit = nldev_dellink,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
2017-06-20 11:30:33 +03:00
|
|
|
[RDMA_NLDEV_CMD_PORT_GET] = {
|
2017-06-22 16:10:38 +03:00
|
|
|
.doit = nldev_port_get_doit,
|
2017-06-20 11:30:33 +03:00
|
|
|
.dump = nldev_port_get_dumpit,
|
|
|
|
},
|
2018-01-28 11:17:24 +02:00
|
|
|
[RDMA_NLDEV_CMD_RES_GET] = {
|
|
|
|
.doit = nldev_res_get_doit,
|
|
|
|
.dump = nldev_res_get_dumpit,
|
|
|
|
},
|
2018-01-28 11:17:25 +02:00
|
|
|
[RDMA_NLDEV_CMD_RES_QP_GET] = {
|
2019-02-18 22:25:45 +02:00
|
|
|
.doit = nldev_res_get_qp_doit,
|
2018-01-28 11:17:25 +02:00
|
|
|
.dump = nldev_res_get_qp_dumpit,
|
|
|
|
},
|
2018-03-01 13:57:44 -08:00
|
|
|
[RDMA_NLDEV_CMD_RES_CM_ID_GET] = {
|
2019-02-18 22:25:45 +02:00
|
|
|
.doit = nldev_res_get_cm_id_doit,
|
2018-03-01 13:57:44 -08:00
|
|
|
.dump = nldev_res_get_cm_id_dumpit,
|
|
|
|
},
|
2018-03-01 13:57:51 -08:00
|
|
|
[RDMA_NLDEV_CMD_RES_CQ_GET] = {
|
2019-02-18 22:25:45 +02:00
|
|
|
.doit = nldev_res_get_cq_doit,
|
2018-03-01 13:57:51 -08:00
|
|
|
.dump = nldev_res_get_cq_dumpit,
|
|
|
|
},
|
2018-03-01 13:58:13 -08:00
|
|
|
[RDMA_NLDEV_CMD_RES_MR_GET] = {
|
2019-02-18 22:25:45 +02:00
|
|
|
.doit = nldev_res_get_mr_doit,
|
2018-03-01 13:58:13 -08:00
|
|
|
.dump = nldev_res_get_mr_dumpit,
|
|
|
|
},
|
2018-03-01 13:58:28 -08:00
|
|
|
[RDMA_NLDEV_CMD_RES_PD_GET] = {
|
2019-02-18 22:25:45 +02:00
|
|
|
.doit = nldev_res_get_pd_doit,
|
2018-03-01 13:58:28 -08:00
|
|
|
.dump = nldev_res_get_pd_dumpit,
|
|
|
|
},
|
2021-04-18 16:41:23 +03:00
|
|
|
[RDMA_NLDEV_CMD_RES_CTX_GET] = {
|
|
|
|
.doit = nldev_res_get_ctx_doit,
|
|
|
|
.dump = nldev_res_get_ctx_dumpit,
|
|
|
|
},
|
2021-04-18 16:41:25 +03:00
|
|
|
[RDMA_NLDEV_CMD_RES_SRQ_GET] = {
|
|
|
|
.doit = nldev_res_get_srq_doit,
|
|
|
|
.dump = nldev_res_get_srq_dumpit,
|
|
|
|
},
|
2019-02-26 14:01:48 +02:00
|
|
|
[RDMA_NLDEV_CMD_SYS_GET] = {
|
2019-05-13 08:26:57 +03:00
|
|
|
.doit = nldev_sys_get_doit,
|
2019-02-26 14:01:48 +02:00
|
|
|
},
|
2019-02-26 14:01:49 +02:00
|
|
|
[RDMA_NLDEV_CMD_SYS_SET] = {
|
|
|
|
.doit = nldev_set_sys_set_doit,
|
2023-10-04 21:17:49 +03:00
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
2019-07-02 13:02:39 +03:00
|
|
|
},
|
|
|
|
[RDMA_NLDEV_CMD_STAT_SET] = {
|
|
|
|
.doit = nldev_stat_set_doit,
|
2019-02-26 14:01:49 +02:00
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
2019-07-02 13:02:40 +03:00
|
|
|
[RDMA_NLDEV_CMD_STAT_GET] = {
|
|
|
|
.doit = nldev_stat_get_doit,
|
|
|
|
.dump = nldev_stat_get_dumpit,
|
|
|
|
},
|
2019-07-02 13:02:44 +03:00
|
|
|
[RDMA_NLDEV_CMD_STAT_DEL] = {
|
|
|
|
.doit = nldev_stat_del_doit,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
RDMA: Add support to dump resource tracker in RAW format
Add support to get resource dump in raw format. It enable drivers to
return the entire device specific QP/CQ/MR context without a need from the
driver to set each field separately.
The raw query returns only the device specific data, general data is still
returned by using the existing queries.
Example:
$ rdma res show mr dev mlx5_1 mrn 2 -r -j
[{"ifindex":7,"ifname":"mlx5_1",
"data":[0,4,255,254,0,0,0,0,0,0,0,0,16,28,0,216,...]}]
Link: https://lore.kernel.org/r/20200623113043.1228482-9-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
2020-06-23 14:30:40 +03:00
|
|
|
[RDMA_NLDEV_CMD_RES_QP_GET_RAW] = {
|
|
|
|
.doit = nldev_res_get_qp_raw_doit,
|
|
|
|
.dump = nldev_res_get_qp_raw_dumpit,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
|
|
|
[RDMA_NLDEV_CMD_RES_CQ_GET_RAW] = {
|
|
|
|
.doit = nldev_res_get_cq_raw_doit,
|
|
|
|
.dump = nldev_res_get_cq_raw_dumpit,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
|
|
|
[RDMA_NLDEV_CMD_RES_MR_GET_RAW] = {
|
|
|
|
.doit = nldev_res_get_mr_raw_doit,
|
|
|
|
.dump = nldev_res_get_mr_raw_dumpit,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
2023-09-18 21:11:09 +08:00
|
|
|
[RDMA_NLDEV_CMD_RES_SRQ_GET_RAW] = {
|
|
|
|
.doit = nldev_res_get_srq_raw_doit,
|
|
|
|
.dump = nldev_res_get_srq_raw_dumpit,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
2021-10-08 15:24:33 +03:00
|
|
|
[RDMA_NLDEV_CMD_STAT_GET_STATUS] = {
|
|
|
|
.doit = nldev_stat_get_counter_status_doit,
|
|
|
|
},
|
2024-06-16 19:08:40 +03:00
|
|
|
[RDMA_NLDEV_CMD_NEWDEV] = {
|
|
|
|
.doit = nldev_newdev,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
|
|
|
[RDMA_NLDEV_CMD_DELDEV] = {
|
|
|
|
.doit = nldev_deldev,
|
|
|
|
.flags = RDMA_NL_ADMIN_PERM,
|
|
|
|
},
|
2017-06-20 09:59:14 +03:00
|
|
|
};
|
|
|
|
|
2024-10-31 11:31:14 +02:00
|
|
|
static int fill_mon_netdev_rename(struct sk_buff *msg,
|
|
|
|
struct ib_device *device, u32 port,
|
|
|
|
const struct net *net)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = ib_device_get_netdev(device, port);
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (!netdev || !net_eq(dev_net(netdev), net))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
|
|
|
|
out:
|
|
|
|
dev_put(netdev);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2024-09-09 20:30:24 +03:00
|
|
|
static int fill_mon_netdev_association(struct sk_buff *msg,
|
|
|
|
struct ib_device *device, u32 port,
|
|
|
|
const struct net *net)
|
|
|
|
{
|
|
|
|
struct net_device *netdev = ib_device_get_netdev(device, port);
|
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
if (netdev && !net_eq(dev_net(netdev), net))
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_DEV_INDEX, device->index);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = nla_put_string(msg, RDMA_NLDEV_ATTR_DEV_NAME,
|
|
|
|
dev_name(&device->dev));
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
if (netdev) {
|
|
|
|
ret = nla_put_u32(msg,
|
|
|
|
RDMA_NLDEV_ATTR_NDEV_INDEX, netdev->ifindex);
|
|
|
|
if (ret)
|
|
|
|
goto out;
|
|
|
|
|
|
|
|
ret = nla_put_string(msg,
|
|
|
|
RDMA_NLDEV_ATTR_NDEV_NAME, netdev->name);
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
dev_put(netdev);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void rdma_nl_notify_err_msg(struct ib_device *device, u32 port_num,
|
|
|
|
enum rdma_nl_notify_event_type type)
|
|
|
|
{
|
|
|
|
struct net_device *netdev;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case RDMA_REGISTER_EVENT:
|
|
|
|
dev_warn_ratelimited(&device->dev,
|
|
|
|
"Failed to send RDMA monitor register device event\n");
|
|
|
|
break;
|
|
|
|
case RDMA_UNREGISTER_EVENT:
|
|
|
|
dev_warn_ratelimited(&device->dev,
|
|
|
|
"Failed to send RDMA monitor unregister device event\n");
|
|
|
|
break;
|
|
|
|
case RDMA_NETDEV_ATTACH_EVENT:
|
|
|
|
netdev = ib_device_get_netdev(device, port_num);
|
|
|
|
dev_warn_ratelimited(&device->dev,
|
|
|
|
"Failed to send RDMA monitor netdev attach event: port %d netdev %d\n",
|
|
|
|
port_num, netdev->ifindex);
|
|
|
|
dev_put(netdev);
|
|
|
|
break;
|
|
|
|
case RDMA_NETDEV_DETACH_EVENT:
|
|
|
|
dev_warn_ratelimited(&device->dev,
|
|
|
|
"Failed to send RDMA monitor netdev detach event: port %d\n",
|
|
|
|
port_num);
|
2024-09-16 06:24:34 -07:00
|
|
|
break;
|
2024-10-31 11:31:14 +02:00
|
|
|
case RDMA_RENAME_EVENT:
|
|
|
|
dev_warn_ratelimited(&device->dev,
|
|
|
|
"Failed to send RDMA monitor rename device event\n");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case RDMA_NETDEV_RENAME_EVENT:
|
|
|
|
netdev = ib_device_get_netdev(device, port_num);
|
|
|
|
dev_warn_ratelimited(&device->dev,
|
|
|
|
"Failed to send RDMA monitor netdev rename event: port %d netdev %d\n",
|
|
|
|
port_num, netdev->ifindex);
|
|
|
|
dev_put(netdev);
|
|
|
|
break;
|
2024-09-09 20:30:24 +03:00
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
int rdma_nl_notify_event(struct ib_device *device, u32 port_num,
|
|
|
|
enum rdma_nl_notify_event_type type)
|
|
|
|
{
|
|
|
|
struct sk_buff *skb;
|
2024-12-10 09:33:10 +02:00
|
|
|
int ret = -EMSGSIZE;
|
2024-09-09 20:30:24 +03:00
|
|
|
struct net *net;
|
|
|
|
void *nlh;
|
|
|
|
|
|
|
|
net = read_pnet(&device->coredev.rdma_net);
|
|
|
|
if (!net)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
|
|
|
|
if (!skb)
|
|
|
|
return -ENOMEM;
|
|
|
|
nlh = nlmsg_put(skb, 0, 0,
|
|
|
|
RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_MONITOR),
|
|
|
|
0, 0);
|
2024-09-27 22:06:13 +08:00
|
|
|
if (!nlh)
|
|
|
|
goto err_free;
|
2024-09-09 20:30:24 +03:00
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case RDMA_REGISTER_EVENT:
|
|
|
|
case RDMA_UNREGISTER_EVENT:
|
2024-10-31 11:31:14 +02:00
|
|
|
case RDMA_RENAME_EVENT:
|
2024-09-09 20:30:24 +03:00
|
|
|
ret = fill_nldev_handle(skb, device);
|
|
|
|
if (ret)
|
|
|
|
goto err_free;
|
|
|
|
break;
|
|
|
|
case RDMA_NETDEV_ATTACH_EVENT:
|
|
|
|
case RDMA_NETDEV_DETACH_EVENT:
|
2024-10-31 11:31:14 +02:00
|
|
|
ret = fill_mon_netdev_association(skb, device, port_num, net);
|
|
|
|
if (ret)
|
|
|
|
goto err_free;
|
|
|
|
break;
|
|
|
|
case RDMA_NETDEV_RENAME_EVENT:
|
|
|
|
ret = fill_mon_netdev_rename(skb, device, port_num, net);
|
2024-09-09 20:30:24 +03:00
|
|
|
if (ret)
|
|
|
|
goto err_free;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = nla_put_u8(skb, RDMA_NLDEV_ATTR_EVENT_TYPE, type);
|
|
|
|
if (ret)
|
|
|
|
goto err_free;
|
|
|
|
|
|
|
|
nlmsg_end(skb, nlh);
|
|
|
|
ret = rdma_nl_multicast(net, skb, RDMA_NL_GROUP_NOTIFY, GFP_KERNEL);
|
|
|
|
if (ret && ret != -ESRCH) {
|
|
|
|
skb = NULL; /* skb is freed in the netlink send-op handling */
|
|
|
|
goto err_free;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_free:
|
|
|
|
rdma_nl_notify_err_msg(device, port_num, type);
|
|
|
|
nlmsg_free(skb);
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-06-20 09:14:15 +03:00
|
|
|
void __init nldev_init(void)
|
|
|
|
{
|
2017-06-20 09:59:14 +03:00
|
|
|
rdma_nl_register(RDMA_NL_NLDEV, nldev_cb_table);
|
2017-06-20 09:14:15 +03:00
|
|
|
}
|
|
|
|
|
2022-10-25 10:41:46 +08:00
|
|
|
void nldev_exit(void)
|
2017-06-20 09:14:15 +03:00
|
|
|
{
|
|
|
|
rdma_nl_unregister(RDMA_NL_NLDEV);
|
|
|
|
}
|
2017-08-14 14:57:39 -06:00
|
|
|
|
|
|
|
MODULE_ALIAS_RDMA_NETLINK(RDMA_NL_NLDEV, 5);
|