linux/drivers/cxl/core/mbox.c

1553 lines
42 KiB
C
Raw Permalink Normal View History

// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#include <linux/security.h>
#include <linux/debugfs.h>
#include <linux/ktime.h>
#include <linux/mutex.h>
#include <linux/unaligned.h>
#include <cxlpci.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
#include "trace.h"
2025-02-26 09:21:21 -07:00
#include "mce.h"
static bool cxl_raw_allow_all;
/**
* DOC: cxl mbox
*
* Core implementation of the CXL 2.0 Type-3 Memory Device Mailbox. The
* implementation is used by the cxl_pci driver to initialize the device
* and implement the cxl_mem.h IOCTL UAPI. It also implements the
* backend of the cxl_pmem_ctl() transport for LIBNVDIMM.
*/
#define cxl_for_each_cmd(cmd) \
for ((cmd) = &cxl_mem_commands[0]; \
((cmd) - cxl_mem_commands) < ARRAY_SIZE(cxl_mem_commands); (cmd)++)
#define CXL_CMD(_id, sin, sout, _flags) \
[CXL_MEM_COMMAND_ID_##_id] = { \
.info = { \
.id = CXL_MEM_COMMAND_ID_##_id, \
.size_in = sin, \
.size_out = sout, \
}, \
.opcode = CXL_MBOX_OP_##_id, \
.flags = _flags, \
}
#define CXL_VARIABLE_PAYLOAD ~0U
/*
* This table defines the supported mailbox commands for the driver. This table
* is made up of a UAPI structure. Non-negative values as parameters in the
* table will be validated against the user's input. For example, if size_in is
* 0, and the user passed in 1, it is an error.
*/
static struct cxl_mem_command cxl_mem_commands[CXL_MEM_COMMAND_ID_MAX] = {
CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
CXL_CMD(RAW, CXL_VARIABLE_PAYLOAD, CXL_VARIABLE_PAYLOAD, 0),
#endif
CXL_CMD(GET_SUPPORTED_LOGS, 0, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
CXL_CMD(GET_LSA, 0x8, CXL_VARIABLE_PAYLOAD, 0),
CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
CXL_CMD(GET_LOG, 0x18, CXL_VARIABLE_PAYLOAD, CXL_CMD_FLAG_FORCE_ENABLE),
CXL_CMD(GET_LOG_CAPS, 0x10, 0x4, 0),
CXL_CMD(CLEAR_LOG, 0x10, 0, 0),
CXL_CMD(GET_SUP_LOG_SUBLIST, 0x2, CXL_VARIABLE_PAYLOAD, 0),
CXL_CMD(SET_PARTITION_INFO, 0x0a, 0, 0),
CXL_CMD(SET_LSA, CXL_VARIABLE_PAYLOAD, 0, 0),
CXL_CMD(GET_ALERT_CONFIG, 0, 0x10, 0),
CXL_CMD(SET_ALERT_CONFIG, 0xc, 0, 0),
CXL_CMD(GET_SHUTDOWN_STATE, 0, 0x1, 0),
CXL_CMD(SET_SHUTDOWN_STATE, 0x1, 0, 0),
CXL_CMD(GET_SCAN_MEDIA_CAPS, 0x10, 0x4, 0),
CXL_CMD(GET_TIMESTAMP, 0, 0x8, 0),
};
/*
* Commands that RAW doesn't permit. The rationale for each:
*
* CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
* coordination of transaction timeout values at the root bridge level.
*
* CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
* and needs to be coordinated with HDM updates.
*
* CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
* driver and any writes from userspace invalidates those contents.
*
* CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
* to the device after it is marked clean, userspace can not make that
* assertion.
*
* CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
* is kept up to date with patrol notifications and error management.
*
* CXL_MBOX_OP_[GET_,INJECT_,CLEAR_]POISON: These commands require kernel
* driver orchestration for safety.
*/
static u16 cxl_disabled_raw_commands[] = {
CXL_MBOX_OP_ACTIVATE_FW,
CXL_MBOX_OP_SET_PARTITION_INFO,
CXL_MBOX_OP_SET_LSA,
CXL_MBOX_OP_SET_SHUTDOWN_STATE,
CXL_MBOX_OP_SCAN_MEDIA,
CXL_MBOX_OP_GET_SCAN_MEDIA,
CXL_MBOX_OP_GET_POISON,
CXL_MBOX_OP_INJECT_POISON,
CXL_MBOX_OP_CLEAR_POISON,
};
/*
* Command sets that RAW doesn't permit. All opcodes in this set are
* disabled because they pass plain text security payloads over the
* user/kernel boundary. This functionality is intended to be wrapped
* behind the keys ABI which allows for encrypted payloads in the UAPI
*/
static u8 security_command_sets[] = {
0x44, /* Sanitize */
0x45, /* Persistent Memory Data-at-rest Security */
0x46, /* Security Passthrough */
};
static bool cxl_is_security_command(u16 opcode)
{
int i;
for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
if (security_command_sets[i] == (opcode >> 8))
return true;
return false;
}
static void cxl_set_security_cmd_enabled(struct cxl_security_state *security,
u16 opcode)
{
switch (opcode) {
case CXL_MBOX_OP_SANITIZE:
set_bit(CXL_SEC_ENABLED_SANITIZE, security->enabled_cmds);
break;
case CXL_MBOX_OP_SECURE_ERASE:
set_bit(CXL_SEC_ENABLED_SECURE_ERASE,
security->enabled_cmds);
break;
case CXL_MBOX_OP_GET_SECURITY_STATE:
set_bit(CXL_SEC_ENABLED_GET_SECURITY_STATE,
security->enabled_cmds);
break;
case CXL_MBOX_OP_SET_PASSPHRASE:
set_bit(CXL_SEC_ENABLED_SET_PASSPHRASE,
security->enabled_cmds);
break;
case CXL_MBOX_OP_DISABLE_PASSPHRASE:
set_bit(CXL_SEC_ENABLED_DISABLE_PASSPHRASE,
security->enabled_cmds);
break;
case CXL_MBOX_OP_UNLOCK:
set_bit(CXL_SEC_ENABLED_UNLOCK, security->enabled_cmds);
break;
case CXL_MBOX_OP_FREEZE_SECURITY:
set_bit(CXL_SEC_ENABLED_FREEZE_SECURITY,
security->enabled_cmds);
break;
case CXL_MBOX_OP_PASSPHRASE_SECURE_ERASE:
set_bit(CXL_SEC_ENABLED_PASSPHRASE_SECURE_ERASE,
security->enabled_cmds);
break;
default:
break;
}
}
static bool cxl_is_poison_command(u16 opcode)
{
#define CXL_MBOX_OP_POISON_CMDS 0x43
if ((opcode >> 8) == CXL_MBOX_OP_POISON_CMDS)
return true;
return false;
}
static void cxl_set_poison_cmd_enabled(struct cxl_poison_state *poison,
u16 opcode)
{
switch (opcode) {
case CXL_MBOX_OP_GET_POISON:
set_bit(CXL_POISON_ENABLED_LIST, poison->enabled_cmds);
break;
case CXL_MBOX_OP_INJECT_POISON:
set_bit(CXL_POISON_ENABLED_INJECT, poison->enabled_cmds);
break;
case CXL_MBOX_OP_CLEAR_POISON:
set_bit(CXL_POISON_ENABLED_CLEAR, poison->enabled_cmds);
break;
case CXL_MBOX_OP_GET_SCAN_MEDIA_CAPS:
set_bit(CXL_POISON_ENABLED_SCAN_CAPS, poison->enabled_cmds);
break;
case CXL_MBOX_OP_SCAN_MEDIA:
set_bit(CXL_POISON_ENABLED_SCAN_MEDIA, poison->enabled_cmds);
break;
case CXL_MBOX_OP_GET_SCAN_MEDIA:
set_bit(CXL_POISON_ENABLED_SCAN_RESULTS, poison->enabled_cmds);
break;
default:
break;
}
}
static struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
{
struct cxl_mem_command *c;
cxl_for_each_cmd(c)
if (c->opcode == opcode)
return c;
return NULL;
}
static const char *cxl_mem_opcode_to_name(u16 opcode)
{
struct cxl_mem_command *c;
c = cxl_mem_find_command(opcode);
if (!c)
return NULL;
return cxl_command_names[c->info.id].name;
}
/**
* cxl_internal_send_cmd() - Kernel internal interface to send a mailbox command
* @cxl_mbox: CXL mailbox context
* @mbox_cmd: initialized command to execute
*
* Context: Any context.
* Return:
* * %>=0 - Number of bytes returned in @out.
* * %-E2BIG - Payload is too large for hardware.
* * %-EBUSY - Couldn't acquire exclusive mailbox access.
* * %-EFAULT - Hardware error occurred.
* * %-ENXIO - Command completed, but device reported an error.
* * %-EIO - Unexpected output size.
*
* Mailbox commands may execute successfully yet the device itself reported an
* error. While this distinction can be useful for commands from userspace, the
* kernel will only be able to use results when both are successful.
*/
int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd)
{
size_t out_size, min_out;
int rc;
if (mbox_cmd->size_in > cxl_mbox->payload_size ||
mbox_cmd->size_out > cxl_mbox->payload_size)
return -E2BIG;
out_size = mbox_cmd->size_out;
min_out = mbox_cmd->min_out;
rc = cxl_mbox->mbox_send(cxl_mbox, mbox_cmd);
cxl/mbox: Fix Payload Length check for Get Log command Commit 2aeaf663b85e introduced strict checking for variable length payload size validation. The payload length of received data must match the size of the requested data by the caller except for the case where the min_out value is set. The Get Log command does not have a header with a length field set. The Log size is determined by the Get Supported Logs command (CXL 3.0, 8.2.9.5.1). However, the actual size can be smaller and the number of valid bytes in the payload output must be determined reading the Payload Length field (CXL 3.0, Table 8-36, Note 2). Two issues arise: The command can successfully complete with a payload length of zero. And, the valid payload length must then also be consumed by the caller. Change cxl_xfer_log() to pass the number of payload bytes back to the caller to determine the number of log entries. Implement the payload handling as a special case where mbox_cmd->size_out is consulted when cxl_internal_send_cmd() returns -EIO. A WARN_ONCE() is added to check that -EIO is only returned in case of an unexpected output size. Logs can be bigger than the maximum payload length and multiple Get Log commands can be issued. If the received payload size is smaller than the maximum payload size we can assume all valid bytes have been fetched. Stop sending further Get Log commands then. On that occasion, change debug messages to also report the opcodes of supported commands. The variable payload commands GET_LSA and SET_LSA are not affected by this strict check: SET_LSA cannot be broken because SET_LSA does not return an output payload, and GET_LSA never expects short reads. Fixes: 2aeaf663b85e ("cxl/mbox: Add variable output size validation for internal commands") Signed-off-by: Robert Richter <rrichter@amd.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/20230119094934.86067-1-rrichter@amd.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-19 10:49:34 +01:00
/*
* EIO is reserved for a payload size mismatch and mbox_send()
* may not return this error.
*/
if (WARN_ONCE(rc == -EIO, "Bad return code: -EIO"))
return -ENXIO;
if (rc)
return rc;
if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS &&
mbox_cmd->return_code != CXL_MBOX_CMD_RC_BACKGROUND)
return cxl_mbox_cmd_rc2errno(mbox_cmd);
if (!out_size)
return 0;
/*
* Variable sized output needs to at least satisfy the caller's
* minimum if not the fully requested size.
*/
if (min_out == 0)
min_out = out_size;
if (mbox_cmd->size_out < min_out)
return -EIO;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_internal_send_cmd, "CXL");
static bool cxl_mem_raw_command_allowed(u16 opcode)
{
int i;
if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
return false;
if (security_locked_down(LOCKDOWN_PCI_ACCESS))
return false;
if (cxl_raw_allow_all)
return true;
if (cxl_is_security_command(opcode))
return false;
for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
if (cxl_disabled_raw_commands[i] == opcode)
return false;
return true;
}
/**
* cxl_payload_from_user_allowed() - Check contents of in_payload.
* @opcode: The mailbox command opcode.
* @payload_in: Pointer to the input payload passed in from user space.
*
* Return:
* * true - payload_in passes check for @opcode.
* * false - payload_in contains invalid or unsupported values.
*
* The driver may inspect payload contents before sending a mailbox
* command from user space to the device. The intent is to reject
* commands with input payloads that are known to be unsafe. This
* check is not intended to replace the users careful selection of
* mailbox command parameters and makes no guarantee that the user
* command will succeed, nor that it is appropriate.
*
* The specific checks are determined by the opcode.
*/
static bool cxl_payload_from_user_allowed(u16 opcode, void *payload_in)
{
switch (opcode) {
case CXL_MBOX_OP_SET_PARTITION_INFO: {
struct cxl_mbox_set_partition_info *pi = payload_in;
if (pi->flags & CXL_SET_PARTITION_IMMEDIATE_FLAG)
return false;
break;
}
case CXL_MBOX_OP_CLEAR_LOG: {
const uuid_t *uuid = (uuid_t *)payload_in;
/*
* Restrict the Clear log action to only apply to
* Vendor debug logs.
*/
return uuid_equal(uuid, &DEFINE_CXL_VENDOR_DEBUG_UUID);
}
default:
break;
}
return true;
}
static int cxl_mbox_cmd_ctor(struct cxl_mbox_cmd *mbox_cmd,
struct cxl_mailbox *cxl_mbox, u16 opcode,
size_t in_size, size_t out_size, u64 in_payload)
{
*mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = opcode,
.size_in = in_size,
};
if (in_size) {
mbox_cmd->payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
in_size);
if (IS_ERR(mbox_cmd->payload_in))
return PTR_ERR(mbox_cmd->payload_in);
if (!cxl_payload_from_user_allowed(opcode, mbox_cmd->payload_in)) {
dev_dbg(cxl_mbox->host, "%s: input payload not allowed\n",
cxl_mem_opcode_to_name(opcode));
kvfree(mbox_cmd->payload_in);
return -EBUSY;
}
}
/* Prepare to handle a full payload for variable sized output */
if (out_size == CXL_VARIABLE_PAYLOAD)
mbox_cmd->size_out = cxl_mbox->payload_size;
else
mbox_cmd->size_out = out_size;
if (mbox_cmd->size_out) {
mbox_cmd->payload_out = kvzalloc(mbox_cmd->size_out, GFP_KERNEL);
if (!mbox_cmd->payload_out) {
kvfree(mbox_cmd->payload_in);
return -ENOMEM;
}
}
return 0;
}
static void cxl_mbox_cmd_dtor(struct cxl_mbox_cmd *mbox)
{
kvfree(mbox->payload_in);
kvfree(mbox->payload_out);
}
static int cxl_to_mem_cmd_raw(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
struct cxl_mailbox *cxl_mbox)
{
if (send_cmd->raw.rsvd)
return -EINVAL;
/*
* Unlike supported commands, the output size of RAW commands
* gets passed along without further checking, so it must be
* validated here.
*/
if (send_cmd->out.size > cxl_mbox->payload_size)
return -EINVAL;
if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
return -EPERM;
dev_WARN_ONCE(cxl_mbox->host, true, "raw command path used\n");
*mem_cmd = (struct cxl_mem_command) {
.info = {
.id = CXL_MEM_COMMAND_ID_RAW,
.size_in = send_cmd->in.size,
.size_out = send_cmd->out.size,
},
.opcode = send_cmd->raw.opcode
};
return 0;
}
static int cxl_to_mem_cmd(struct cxl_mem_command *mem_cmd,
const struct cxl_send_command *send_cmd,
struct cxl_mailbox *cxl_mbox)
{
struct cxl_mem_command *c = &cxl_mem_commands[send_cmd->id];
const struct cxl_command_info *info = &c->info;
if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
return -EINVAL;
if (send_cmd->rsvd)
return -EINVAL;
if (send_cmd->in.rsvd || send_cmd->out.rsvd)
return -EINVAL;
/* Check that the command is enabled for hardware */
if (!test_bit(info->id, cxl_mbox->enabled_cmds))
return -ENOTTY;
/* Check that the command is not claimed for exclusive kernel use */
if (test_bit(info->id, cxl_mbox->exclusive_cmds))
return -EBUSY;
/* Check the input buffer is the expected size */
if ((info->size_in != CXL_VARIABLE_PAYLOAD) &&
(info->size_in != send_cmd->in.size))
return -ENOMEM;
/* Check the output buffer is at least large enough */
if ((info->size_out != CXL_VARIABLE_PAYLOAD) &&
(send_cmd->out.size < info->size_out))
return -ENOMEM;
*mem_cmd = (struct cxl_mem_command) {
.info = {
.id = info->id,
.flags = info->flags,
.size_in = send_cmd->in.size,
.size_out = send_cmd->out.size,
},
.opcode = c->opcode
};
return 0;
}
/**
* cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
* @mbox_cmd: Sanitized and populated &struct cxl_mbox_cmd.
* @cxl_mbox: CXL mailbox context
* @send_cmd: &struct cxl_send_command copied in from userspace.
*
* Return:
* * %0 - @out_cmd is ready to send.
* * %-ENOTTY - Invalid command specified.
* * %-EINVAL - Reserved fields or invalid values were used.
* * %-ENOMEM - Input or output buffer wasn't sized properly.
* * %-EPERM - Attempted to use a protected command.
* * %-EBUSY - Kernel has claimed exclusive access to this opcode
*
* The result of this command is a fully validated command in @mbox_cmd that is
* safe to send to the hardware.
*/
static int cxl_validate_cmd_from_user(struct cxl_mbox_cmd *mbox_cmd,
struct cxl_mailbox *cxl_mbox,
const struct cxl_send_command *send_cmd)
{
struct cxl_mem_command mem_cmd;
int rc;
if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
return -ENOTTY;
/*
* The user can never specify an input payload larger than what hardware
* supports, but output can be arbitrarily large (simply write out as
* much data as the hardware provides).
*/
if (send_cmd->in.size > cxl_mbox->payload_size)
return -EINVAL;
/* Sanitize and construct a cxl_mem_command */
if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW)
rc = cxl_to_mem_cmd_raw(&mem_cmd, send_cmd, cxl_mbox);
else
rc = cxl_to_mem_cmd(&mem_cmd, send_cmd, cxl_mbox);
if (rc)
return rc;
/* Sanitize and construct a cxl_mbox_cmd */
return cxl_mbox_cmd_ctor(mbox_cmd, cxl_mbox, mem_cmd.opcode,
mem_cmd.info.size_in, mem_cmd.info.size_out,
send_cmd->in.payload);
}
int cxl_query_cmd(struct cxl_mailbox *cxl_mbox,
struct cxl_mem_query_commands __user *q)
{
struct device *dev = cxl_mbox->host;
struct cxl_mem_command *cmd;
u32 n_commands;
int j = 0;
dev_dbg(dev, "Query IOCTL\n");
if (get_user(n_commands, &q->n_commands))
return -EFAULT;
/* returns the total number if 0 elements are requested. */
if (n_commands == 0)
return put_user(ARRAY_SIZE(cxl_mem_commands), &q->n_commands);
/*
* otherwise, return min(n_commands, total commands) cxl_command_info
* structures.
*/
cxl_for_each_cmd(cmd) {
struct cxl_command_info info = cmd->info;
if (test_bit(info.id, cxl_mbox->enabled_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_ENABLED;
if (test_bit(info.id, cxl_mbox->exclusive_cmds))
info.flags |= CXL_MEM_COMMAND_FLAG_EXCLUSIVE;
if (copy_to_user(&q->commands[j++], &info, sizeof(info)))
return -EFAULT;
if (j == n_commands)
break;
}
return 0;
}
/**
* handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
* @cxl_mbox: The mailbox context for the operation.
* @mbox_cmd: The validated mailbox command.
* @out_payload: Pointer to userspace's output payload.
* @size_out: (Input) Max payload size to copy out.
* (Output) Payload size hardware generated.
* @retval: Hardware generated return code from the operation.
*
* Return:
* * %0 - Mailbox transaction succeeded. This implies the mailbox
* protocol completed successfully not that the operation itself
* was successful.
* * %-ENOMEM - Couldn't allocate a bounce buffer.
* * %-EFAULT - Something happened with copy_to/from_user.
* * %-EINTR - Mailbox acquisition interrupted.
* * %-EXXX - Transaction level failures.
*
* Dispatches a mailbox command on behalf of a userspace request.
* The output payload is copied to userspace.
*
* See cxl_send_cmd().
*/
static int handle_mailbox_cmd_from_user(struct cxl_mailbox *cxl_mbox,
struct cxl_mbox_cmd *mbox_cmd,
u64 out_payload, s32 *size_out,
u32 *retval)
{
struct device *dev = cxl_mbox->host;
int rc;
dev_dbg(dev,
"Submitting %s command for user\n"
"\topcode: %x\n"
"\tsize: %zx\n",
cxl_mem_opcode_to_name(mbox_cmd->opcode),
mbox_cmd->opcode, mbox_cmd->size_in);
rc = cxl_mbox->mbox_send(cxl_mbox, mbox_cmd);
if (rc)
goto out;
/*
* @size_out contains the max size that's allowed to be written back out
* to userspace. While the payload may have written more output than
* this it will have to be ignored.
*/
if (mbox_cmd->size_out) {
dev_WARN_ONCE(dev, mbox_cmd->size_out > *size_out,
"Invalid return size\n");
if (copy_to_user(u64_to_user_ptr(out_payload),
mbox_cmd->payload_out, mbox_cmd->size_out)) {
rc = -EFAULT;
goto out;
}
}
*size_out = mbox_cmd->size_out;
*retval = mbox_cmd->return_code;
out:
cxl_mbox_cmd_dtor(mbox_cmd);
return rc;
}
int cxl_send_cmd(struct cxl_mailbox *cxl_mbox, struct cxl_send_command __user *s)
{
struct device *dev = cxl_mbox->host;
struct cxl_send_command send;
struct cxl_mbox_cmd mbox_cmd;
int rc;
dev_dbg(dev, "Send IOCTL\n");
if (copy_from_user(&send, s, sizeof(send)))
return -EFAULT;
rc = cxl_validate_cmd_from_user(&mbox_cmd, cxl_mbox, &send);
if (rc)
return rc;
rc = handle_mailbox_cmd_from_user(cxl_mbox, &mbox_cmd, send.out.payload,
&send.out.size, &send.retval);
if (rc)
return rc;
if (copy_to_user(s, &send, sizeof(send)))
return -EFAULT;
return 0;
}
static int cxl_xfer_log(struct cxl_memdev_state *mds, uuid_t *uuid,
u32 *size, u8 *out)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
cxl/mbox: Fix Payload Length check for Get Log command Commit 2aeaf663b85e introduced strict checking for variable length payload size validation. The payload length of received data must match the size of the requested data by the caller except for the case where the min_out value is set. The Get Log command does not have a header with a length field set. The Log size is determined by the Get Supported Logs command (CXL 3.0, 8.2.9.5.1). However, the actual size can be smaller and the number of valid bytes in the payload output must be determined reading the Payload Length field (CXL 3.0, Table 8-36, Note 2). Two issues arise: The command can successfully complete with a payload length of zero. And, the valid payload length must then also be consumed by the caller. Change cxl_xfer_log() to pass the number of payload bytes back to the caller to determine the number of log entries. Implement the payload handling as a special case where mbox_cmd->size_out is consulted when cxl_internal_send_cmd() returns -EIO. A WARN_ONCE() is added to check that -EIO is only returned in case of an unexpected output size. Logs can be bigger than the maximum payload length and multiple Get Log commands can be issued. If the received payload size is smaller than the maximum payload size we can assume all valid bytes have been fetched. Stop sending further Get Log commands then. On that occasion, change debug messages to also report the opcodes of supported commands. The variable payload commands GET_LSA and SET_LSA are not affected by this strict check: SET_LSA cannot be broken because SET_LSA does not return an output payload, and GET_LSA never expects short reads. Fixes: 2aeaf663b85e ("cxl/mbox: Add variable output size validation for internal commands") Signed-off-by: Robert Richter <rrichter@amd.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/20230119094934.86067-1-rrichter@amd.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-19 10:49:34 +01:00
u32 remaining = *size;
u32 offset = 0;
while (remaining) {
u32 xfer_size = min_t(u32, remaining, cxl_mbox->payload_size);
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_get_log log;
int rc;
log = (struct cxl_mbox_get_log) {
.uuid = *uuid,
.offset = cpu_to_le32(offset),
.length = cpu_to_le32(xfer_size),
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_LOG,
.size_in = sizeof(log),
.payload_in = &log,
.size_out = xfer_size,
.payload_out = out,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
cxl/mbox: Fix Payload Length check for Get Log command Commit 2aeaf663b85e introduced strict checking for variable length payload size validation. The payload length of received data must match the size of the requested data by the caller except for the case where the min_out value is set. The Get Log command does not have a header with a length field set. The Log size is determined by the Get Supported Logs command (CXL 3.0, 8.2.9.5.1). However, the actual size can be smaller and the number of valid bytes in the payload output must be determined reading the Payload Length field (CXL 3.0, Table 8-36, Note 2). Two issues arise: The command can successfully complete with a payload length of zero. And, the valid payload length must then also be consumed by the caller. Change cxl_xfer_log() to pass the number of payload bytes back to the caller to determine the number of log entries. Implement the payload handling as a special case where mbox_cmd->size_out is consulted when cxl_internal_send_cmd() returns -EIO. A WARN_ONCE() is added to check that -EIO is only returned in case of an unexpected output size. Logs can be bigger than the maximum payload length and multiple Get Log commands can be issued. If the received payload size is smaller than the maximum payload size we can assume all valid bytes have been fetched. Stop sending further Get Log commands then. On that occasion, change debug messages to also report the opcodes of supported commands. The variable payload commands GET_LSA and SET_LSA are not affected by this strict check: SET_LSA cannot be broken because SET_LSA does not return an output payload, and GET_LSA never expects short reads. Fixes: 2aeaf663b85e ("cxl/mbox: Add variable output size validation for internal commands") Signed-off-by: Robert Richter <rrichter@amd.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/20230119094934.86067-1-rrichter@amd.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-19 10:49:34 +01:00
/*
* The output payload length that indicates the number
* of valid bytes can be smaller than the Log buffer
* size.
*/
if (rc == -EIO && mbox_cmd.size_out < xfer_size) {
offset += mbox_cmd.size_out;
break;
}
if (rc < 0)
return rc;
out += xfer_size;
remaining -= xfer_size;
offset += xfer_size;
}
cxl/mbox: Fix Payload Length check for Get Log command Commit 2aeaf663b85e introduced strict checking for variable length payload size validation. The payload length of received data must match the size of the requested data by the caller except for the case where the min_out value is set. The Get Log command does not have a header with a length field set. The Log size is determined by the Get Supported Logs command (CXL 3.0, 8.2.9.5.1). However, the actual size can be smaller and the number of valid bytes in the payload output must be determined reading the Payload Length field (CXL 3.0, Table 8-36, Note 2). Two issues arise: The command can successfully complete with a payload length of zero. And, the valid payload length must then also be consumed by the caller. Change cxl_xfer_log() to pass the number of payload bytes back to the caller to determine the number of log entries. Implement the payload handling as a special case where mbox_cmd->size_out is consulted when cxl_internal_send_cmd() returns -EIO. A WARN_ONCE() is added to check that -EIO is only returned in case of an unexpected output size. Logs can be bigger than the maximum payload length and multiple Get Log commands can be issued. If the received payload size is smaller than the maximum payload size we can assume all valid bytes have been fetched. Stop sending further Get Log commands then. On that occasion, change debug messages to also report the opcodes of supported commands. The variable payload commands GET_LSA and SET_LSA are not affected by this strict check: SET_LSA cannot be broken because SET_LSA does not return an output payload, and GET_LSA never expects short reads. Fixes: 2aeaf663b85e ("cxl/mbox: Add variable output size validation for internal commands") Signed-off-by: Robert Richter <rrichter@amd.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Link: https://lore.kernel.org/r/20230119094934.86067-1-rrichter@amd.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-19 10:49:34 +01:00
*size = offset;
return 0;
}
static int check_features_opcodes(u16 opcode, int *ro_cmds, int *wr_cmds)
{
switch (opcode) {
case CXL_MBOX_OP_GET_SUPPORTED_FEATURES:
case CXL_MBOX_OP_GET_FEATURE:
(*ro_cmds)++;
return 1;
case CXL_MBOX_OP_SET_FEATURE:
(*wr_cmds)++;
return 1;
default:
return 0;
}
}
/* 'Get Supported Features' and 'Get Feature' */
#define MAX_FEATURES_READ_CMDS 2
static void set_features_cap(struct cxl_mailbox *cxl_mbox,
int ro_cmds, int wr_cmds)
{
/* Setting up Features capability while walking the CEL */
if (ro_cmds == MAX_FEATURES_READ_CMDS) {
if (wr_cmds)
cxl_mbox->feat_cap = CXL_FEATURES_RW;
else
cxl_mbox->feat_cap = CXL_FEATURES_RO;
}
}
/**
* cxl_walk_cel() - Walk through the Command Effects Log.
* @mds: The driver data for the operation
* @size: Length of the Command Effects Log.
* @cel: CEL
*
* Iterate over each entry in the CEL and determine if the driver supports the
* command. If so, the command is enabled for the device and can be used later.
*/
static void cxl_walk_cel(struct cxl_memdev_state *mds, size_t size, u8 *cel)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_cel_entry *cel_entry;
const int cel_entries = size / sizeof(*cel_entry);
struct device *dev = mds->cxlds.dev;
int i, ro_cmds = 0, wr_cmds = 0;
cel_entry = (struct cxl_cel_entry *) cel;
for (i = 0; i < cel_entries; i++) {
u16 opcode = le16_to_cpu(cel_entry[i].opcode);
struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
int enabled = 0;
if (cmd) {
set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
enabled++;
}
enabled += check_features_opcodes(opcode, &ro_cmds,
&wr_cmds);
if (cxl_is_poison_command(opcode)) {
cxl_set_poison_cmd_enabled(&mds->poison, opcode);
enabled++;
}
if (cxl_is_security_command(opcode)) {
cxl_set_security_cmd_enabled(&mds->security, opcode);
enabled++;
}
dev_dbg(dev, "Opcode 0x%04x %s\n", opcode,
enabled ? "enabled" : "unsupported by driver");
}
set_features_cap(cxl_mbox, ro_cmds, wr_cmds);
}
static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *ret;
struct cxl_mbox_cmd mbox_cmd;
int rc;
ret = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!ret)
return ERR_PTR(-ENOMEM);
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_SUPPORTED_LOGS,
.size_out = cxl_mbox->payload_size,
.payload_out = ret,
/* At least the record number field must be valid */
.min_out = 2,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
kvfree(ret);
return ERR_PTR(rc);
}
return ret;
}
enum {
CEL_UUID,
VENDOR_DEBUG_UUID,
};
/* See CXL 2.0 Table 170. Get Log Input Payload */
static const uuid_t log_uuid[] = {
[CEL_UUID] = DEFINE_CXL_CEL_UUID,
[VENDOR_DEBUG_UUID] = DEFINE_CXL_VENDOR_DEBUG_UUID,
};
/**
* cxl_enumerate_cmds() - Enumerate commands for a device.
* @mds: The driver data for the operation
*
* Returns 0 if enumerate completed successfully.
*
* CXL devices have optional support for certain commands. This function will
* determine the set of supported commands for the hardware and update the
* enabled_cmds bitmap in the @mds.
*/
int cxl_enumerate_cmds(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_supported_logs *gsl;
struct device *dev = mds->cxlds.dev;
struct cxl_mem_command *cmd;
int i, rc;
gsl = cxl_get_gsl(mds);
if (IS_ERR(gsl))
return PTR_ERR(gsl);
rc = -ENOENT;
for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
u32 size = le32_to_cpu(gsl->entry[i].size);
uuid_t uuid = gsl->entry[i].uuid;
u8 *log;
dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
continue;
log = kvmalloc(size, GFP_KERNEL);
if (!log) {
rc = -ENOMEM;
goto out;
}
rc = cxl_xfer_log(mds, &uuid, &size, log);
if (rc) {
kvfree(log);
goto out;
}
cxl_walk_cel(mds, size, log);
kvfree(log);
/* In case CEL was bogus, enable some default commands. */
cxl_for_each_cmd(cmd)
if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
set_bit(cmd->info.id, cxl_mbox->enabled_cmds);
/* Found the required CEL */
rc = 0;
}
out:
kvfree(gsl);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_enumerate_cmds, "CXL");
void cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
enum cxl_event_type event_type,
const uuid_t *uuid, union cxl_event *evt)
{
if (event_type == CXL_CPER_EVENT_MEM_MODULE) {
trace_cxl_memory_module(cxlmd, type, &evt->mem_module);
return;
}
if (event_type == CXL_CPER_EVENT_GENERIC) {
trace_cxl_generic_event(cxlmd, type, uuid, &evt->generic);
return;
}
cxl/events: Trace Memory Sparing Event Record CXL rev 3.2 section 8.2.10.2.1.4 Table 8-60 defines the Memory Sparing Event Record. Determine if the event read is memory sparing record and if so trace the record. Memory device shall produce a memory sparing event record 1. After completion of a PPR maintenance operation if the memory sparing event record enable bit is set (Field: sPPR/hPPR Operation Mode in Table 8-128/Table 8-131). 2. In response to a query request by the host (see section 8.2.10.7.1.4) to determine the availability of sparing resources. The device shall report the resource availability by producing the Memory Sparing Event Record (see Table 8-60) in which the channel, rank, nibble mask, bank group, bank, row, column, sub-channel fields are a copy of the values specified in the request. If the controller does not support reporting whether a resource is available, and a perform maintenance operation for memory sparing is issued with query resources set to 1, the controller shall return invalid input. Example trace log for produce memory sparing event record on completion of a soft PPR operation, cxl_memory_sparing: memdev=mem1 host=0000:0f:00.0 serial=3 log=Informational : time=55045163029 uuid=e71f3a40-2d29-4092-8a39-4d1c966c7c65 len=128 flags='0x1' handle=1 related_handle=0 maint_op_class=2 maint_op_sub_class=1 ld_id=0 head_id=0 : flags='' result=0 validity_flags='CHANNEL|RANK|NIBBLE|BANK GROUP|BANK|ROW|COLUMN' spare resource avail=1 channel=2 rank=5 nibble_mask=a59c bank_group=2 bank=4 row=13 column=23 sub_channel=0 comp_id=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 comp_id_pldm_valid_flags='' pldm_entity_id=0x00 pldm_resource_id=0x00 Note: For memory sparing event record, fields 'maintenance operation class' and 'maintenance operation subclass' are defined twice, first in the common event record (Table 8-55) and second in the memory sparing event record (Table 8-60). Thus those in the sparing event record coded as reserved, to be removed when the spec is updated. Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Link: https://patch.msgid.link/20250717101817.2104-5-shiju.jose@huawei.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-07-17 11:18:17 +01:00
if (event_type == CXL_CPER_EVENT_MEM_SPARING) {
trace_cxl_memory_sparing(cxlmd, type, &evt->mem_sparing);
return;
}
if (trace_cxl_general_media_enabled() || trace_cxl_dram_enabled()) {
u64 dpa, hpa = ULLONG_MAX, hpa_alias = ULLONG_MAX;
struct cxl_region *cxlr;
/*
* These trace points are annotated with HPA and region
* translations. Take topology mutation locks and lookup
* { HPA, REGION } from { DPA, MEMDEV } in the event record.
*/
cxl: Convert to ACQUIRE() for conditional rwsem locking Use ACQUIRE() to cleanup conditional locking paths in the CXL driver The ACQUIRE() macro and its associated ACQUIRE_ERR() helpers, like scoped_cond_guard(), arrange for scoped-based conditional locking. Unlike scoped_cond_guard(), these macros arrange for an ERR_PTR() to be retrieved representing the state of the conditional lock. The goal of this conversion is to complete the removal of all explicit unlock calls in the subsystem. I.e. the methods to acquire a lock are solely via guard(), scoped_guard() (for limited cases), or ACQUIRE(). All unlock is implicit / scope-based. In order to make sure all lock sites are converted, the existing rwsem's are consolidated and renamed in 'struct cxl_rwsem'. While that makes the patch noisier it gives a clean cut-off between old-world (explicit unlock allowed), and new world (explicit unlock deleted). Cc: David Lechner <dlechner@baylibre.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Jonathan Cameron <jonathan.cameron@huawei.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Alison Schofield <alison.schofield@intel.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Shiju Jose <shiju.jose@huawei.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Tested-by: Shiju Jose <shiju.jose@huawei.com> Link: https://patch.msgid.link/20250711234932.671292-9-dan.j.williams@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-07-11 16:49:32 -07:00
guard(rwsem_read)(&cxl_rwsem.region);
guard(rwsem_read)(&cxl_rwsem.dpa);
dpa = le64_to_cpu(evt->media_hdr.phys_addr) & CXL_DPA_MASK;
cxlr = cxl_dpa_to_region(cxlmd, dpa);
if (cxlr) {
u64 cache_size = cxlr->params.cache_size;
hpa = cxl_dpa_to_hpa(cxlr, cxlmd, dpa);
if (cache_size)
hpa_alias = hpa - cache_size;
}
cxl/edac: Support for finding memory operation attributes from the current boot Certain operations on memory, such as memory repair, are permitted only when the address and other attributes for the operation are from the current boot. This is determined by checking whether the memory attributes for the operation match those in the CXL gen_media or CXL DRAM memory event records reported during the current boot. The CXL event records must be backed up because they are cleared in the hardware after being processed by the kernel. Support is added for storing CXL gen_media or CXL DRAM memory event records in xarrays. Old records are deleted when they expire or when there is an overflow and which depends on platform correctly report Event Record Timestamp field of CXL spec Table 8-55 Common Event Record Format. Additionally, helper functions are implemented to find a matching record in the xarray storage based on the memory attributes and repair type. Add validity check, when matching attributes for sparing, using the validity flag in the DRAM event record, to ensure that all required attributes for a requested repair operation are valid and set. Reviewed-by: Dave Jiang <dave.jiang@intel.com> Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Acked-by: Dan Williams <dan.j.williams@intel.com> Link: https://patch.msgid.link/20250521124749.817-7-shiju.jose@huawei.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-05-21 13:47:44 +01:00
if (event_type == CXL_CPER_EVENT_GEN_MEDIA) {
if (cxl_store_rec_gen_media((struct cxl_memdev *)cxlmd, evt))
dev_dbg(&cxlmd->dev, "CXL store rec_gen_media failed\n");
if (evt->gen_media.media_hdr.descriptor &
CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
WARN_ON_ONCE((evt->gen_media.media_hdr.type &
CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) &&
!get_unaligned_le24(evt->gen_media.cme_count));
else
WARN_ON_ONCE(evt->gen_media.media_hdr.type &
CXL_GMER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE);
trace_cxl_general_media(cxlmd, type, cxlr, hpa,
hpa_alias, &evt->gen_media);
cxl/edac: Support for finding memory operation attributes from the current boot Certain operations on memory, such as memory repair, are permitted only when the address and other attributes for the operation are from the current boot. This is determined by checking whether the memory attributes for the operation match those in the CXL gen_media or CXL DRAM memory event records reported during the current boot. The CXL event records must be backed up because they are cleared in the hardware after being processed by the kernel. Support is added for storing CXL gen_media or CXL DRAM memory event records in xarrays. Old records are deleted when they expire or when there is an overflow and which depends on platform correctly report Event Record Timestamp field of CXL spec Table 8-55 Common Event Record Format. Additionally, helper functions are implemented to find a matching record in the xarray storage based on the memory attributes and repair type. Add validity check, when matching attributes for sparing, using the validity flag in the DRAM event record, to ensure that all required attributes for a requested repair operation are valid and set. Reviewed-by: Dave Jiang <dave.jiang@intel.com> Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Acked-by: Dan Williams <dan.j.williams@intel.com> Link: https://patch.msgid.link/20250521124749.817-7-shiju.jose@huawei.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-05-21 13:47:44 +01:00
} else if (event_type == CXL_CPER_EVENT_DRAM) {
if (cxl_store_rec_dram((struct cxl_memdev *)cxlmd, evt))
dev_dbg(&cxlmd->dev, "CXL store rec_dram failed\n");
if (evt->dram.media_hdr.descriptor &
CXL_GMER_EVT_DESC_THRESHOLD_EVENT)
WARN_ON_ONCE((evt->dram.media_hdr.type &
CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE) &&
!get_unaligned_le24(evt->dram.cvme_count));
else
WARN_ON_ONCE(evt->dram.media_hdr.type &
CXL_DER_MEM_EVT_TYPE_AP_CME_COUNTER_EXPIRE);
trace_cxl_dram(cxlmd, type, cxlr, hpa, hpa_alias,
&evt->dram);
cxl/edac: Support for finding memory operation attributes from the current boot Certain operations on memory, such as memory repair, are permitted only when the address and other attributes for the operation are from the current boot. This is determined by checking whether the memory attributes for the operation match those in the CXL gen_media or CXL DRAM memory event records reported during the current boot. The CXL event records must be backed up because they are cleared in the hardware after being processed by the kernel. Support is added for storing CXL gen_media or CXL DRAM memory event records in xarrays. Old records are deleted when they expire or when there is an overflow and which depends on platform correctly report Event Record Timestamp field of CXL spec Table 8-55 Common Event Record Format. Additionally, helper functions are implemented to find a matching record in the xarray storage based on the memory attributes and repair type. Add validity check, when matching attributes for sparing, using the validity flag in the DRAM event record, to ensure that all required attributes for a requested repair operation are valid and set. Reviewed-by: Dave Jiang <dave.jiang@intel.com> Co-developed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Acked-by: Dan Williams <dan.j.williams@intel.com> Link: https://patch.msgid.link/20250521124749.817-7-shiju.jose@huawei.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-05-21 13:47:44 +01:00
}
}
}
EXPORT_SYMBOL_NS_GPL(cxl_event_trace_record, "CXL");
static void __cxl_event_trace_record(const struct cxl_memdev *cxlmd,
enum cxl_event_log_type type,
struct cxl_event_record_raw *record)
{
enum cxl_event_type ev_type = CXL_CPER_EVENT_GENERIC;
const uuid_t *uuid = &record->id;
if (uuid_equal(uuid, &CXL_EVENT_GEN_MEDIA_UUID))
ev_type = CXL_CPER_EVENT_GEN_MEDIA;
else if (uuid_equal(uuid, &CXL_EVENT_DRAM_UUID))
ev_type = CXL_CPER_EVENT_DRAM;
else if (uuid_equal(uuid, &CXL_EVENT_MEM_MODULE_UUID))
ev_type = CXL_CPER_EVENT_MEM_MODULE;
cxl/events: Trace Memory Sparing Event Record CXL rev 3.2 section 8.2.10.2.1.4 Table 8-60 defines the Memory Sparing Event Record. Determine if the event read is memory sparing record and if so trace the record. Memory device shall produce a memory sparing event record 1. After completion of a PPR maintenance operation if the memory sparing event record enable bit is set (Field: sPPR/hPPR Operation Mode in Table 8-128/Table 8-131). 2. In response to a query request by the host (see section 8.2.10.7.1.4) to determine the availability of sparing resources. The device shall report the resource availability by producing the Memory Sparing Event Record (see Table 8-60) in which the channel, rank, nibble mask, bank group, bank, row, column, sub-channel fields are a copy of the values specified in the request. If the controller does not support reporting whether a resource is available, and a perform maintenance operation for memory sparing is issued with query resources set to 1, the controller shall return invalid input. Example trace log for produce memory sparing event record on completion of a soft PPR operation, cxl_memory_sparing: memdev=mem1 host=0000:0f:00.0 serial=3 log=Informational : time=55045163029 uuid=e71f3a40-2d29-4092-8a39-4d1c966c7c65 len=128 flags='0x1' handle=1 related_handle=0 maint_op_class=2 maint_op_sub_class=1 ld_id=0 head_id=0 : flags='' result=0 validity_flags='CHANNEL|RANK|NIBBLE|BANK GROUP|BANK|ROW|COLUMN' spare resource avail=1 channel=2 rank=5 nibble_mask=a59c bank_group=2 bank=4 row=13 column=23 sub_channel=0 comp_id=00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 comp_id_pldm_valid_flags='' pldm_entity_id=0x00 pldm_resource_id=0x00 Note: For memory sparing event record, fields 'maintenance operation class' and 'maintenance operation subclass' are defined twice, first in the common event record (Table 8-55) and second in the memory sparing event record (Table 8-60). Thus those in the sparing event record coded as reserved, to be removed when the spec is updated. Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Signed-off-by: Shiju Jose <shiju.jose@huawei.com> Link: https://patch.msgid.link/20250717101817.2104-5-shiju.jose@huawei.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-07-17 11:18:17 +01:00
else if (uuid_equal(uuid, &CXL_EVENT_MEM_SPARING_UUID))
ev_type = CXL_CPER_EVENT_MEM_SPARING;
cxl_event_trace_record(cxlmd, type, ev_type, uuid, &record->event);
}
static int cxl_clear_event_record(struct cxl_memdev_state *mds,
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
enum cxl_event_log_type log,
struct cxl_get_event_payload *get_pl)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
struct cxl_mbox_clear_event_payload *payload;
u16 total = le16_to_cpu(get_pl->record_count);
u8 max_handles = CXL_CLEAR_EVENT_MAX_HANDLES;
size_t pl_size = struct_size(payload, handles, max_handles);
struct cxl_mbox_cmd mbox_cmd;
u16 cnt;
int rc = 0;
int i;
/* Payload size may limit the max handles */
if (pl_size > cxl_mbox->payload_size) {
max_handles = (cxl_mbox->payload_size - sizeof(*payload)) /
sizeof(__le16);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
pl_size = struct_size(payload, handles, max_handles);
}
payload = kvzalloc(pl_size, GFP_KERNEL);
if (!payload)
return -ENOMEM;
*payload = (struct cxl_mbox_clear_event_payload) {
.event_log = log,
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_CLEAR_EVENT_RECORD,
.payload_in = payload,
.size_in = pl_size,
};
/*
* Clear Event Records uses u8 for the handle cnt while Get Event
* Record can return up to 0xffff records.
*/
i = 0;
for (cnt = 0; cnt < total; cnt++) {
struct cxl_event_record_raw *raw = &get_pl->records[cnt];
struct cxl_event_generic *gen = &raw->event.generic;
payload->handles[i++] = gen->hdr.handle;
dev_dbg(mds->cxlds.dev, "Event log '%d': Clearing %u\n", log,
le16_to_cpu(payload->handles[i - 1]));
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (i == max_handles) {
payload->nr_recs = i;
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (rc)
goto free_pl;
i = 0;
}
}
/* Clear what is left if any */
if (i) {
payload->nr_recs = i;
mbox_cmd.size_in = struct_size(payload, handles, i);
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (rc)
goto free_pl;
}
free_pl:
kvfree(payload);
return rc;
}
static void cxl_mem_get_records_log(struct cxl_memdev_state *mds,
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
enum cxl_event_log_type type)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_memdev *cxlmd = mds->cxlds.cxlmd;
struct device *dev = mds->cxlds.dev;
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
struct cxl_get_event_payload *payload;
u8 log_type = type;
u16 nr_rec;
mutex_lock(&mds->event.log_lock);
payload = mds->event.buf;
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
do {
int rc, i;
cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() A recent change to cxl_mem_get_records_log() [1] highlighted a subtle nuance of looping calls to cxl_internal_send_cmd(), i.e. that cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd argument. That mechanism is useful for communicating underflow, but it is unwanted when reusing @mbox_cmd for a subsequent submission. It turns out that cxl_xfer_log() avoids this scenario by always redefining @mbox_cmd each iteration. Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration. The cxl_mem_get_records_log() change is just a style fixup, but the cxl_mem_get_poison() change is a potential fix, per Alison [2]: Poison list retrieval can hit this case if the MORE flag is set and a follow on read of the list delivers more records than the previous read. ie. device gives one record, sets the _MORE flag, then gives 5. Not an urgent fix since this behavior has not been seen in the wild, but worth tracking as a fix. Cc: Kwangjin Ko <kwangjin.ko@sk.com> Cc: Alison Schofield <alison.schofield@intel.com> Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command") Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1] Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2] Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://lore.kernel.org/r/171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-04-05 15:00:16 -07:00
struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_EVENT_RECORD,
.payload_in = &log_type,
.size_in = sizeof(log_type),
.payload_out = payload,
.size_out = cxl_mbox->payload_size,
cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() A recent change to cxl_mem_get_records_log() [1] highlighted a subtle nuance of looping calls to cxl_internal_send_cmd(), i.e. that cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd argument. That mechanism is useful for communicating underflow, but it is unwanted when reusing @mbox_cmd for a subsequent submission. It turns out that cxl_xfer_log() avoids this scenario by always redefining @mbox_cmd each iteration. Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration. The cxl_mem_get_records_log() change is just a style fixup, but the cxl_mem_get_poison() change is a potential fix, per Alison [2]: Poison list retrieval can hit this case if the MORE flag is set and a follow on read of the list delivers more records than the previous read. ie. device gives one record, sets the _MORE flag, then gives 5. Not an urgent fix since this behavior has not been seen in the wild, but worth tracking as a fix. Cc: Kwangjin Ko <kwangjin.ko@sk.com> Cc: Alison Schofield <alison.schofield@intel.com> Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command") Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1] Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2] Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://lore.kernel.org/r/171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-04-05 15:00:16 -07:00
.min_out = struct_size(payload, records, 0),
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (rc) {
dev_err_ratelimited(dev,
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
"Event log '%d': Failed to query event records : %d",
type, rc);
break;
}
nr_rec = le16_to_cpu(payload->record_count);
if (!nr_rec)
break;
for (i = 0; i < nr_rec; i++)
__cxl_event_trace_record(cxlmd, type,
&payload->records[i]);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (payload->flags & CXL_GET_EVENT_FLAG_OVERFLOW)
trace_cxl_overflow(cxlmd, type, payload);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
rc = cxl_clear_event_record(mds, type, payload);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (rc) {
dev_err_ratelimited(dev,
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
"Event log '%d': Failed to clear events : %d",
type, rc);
break;
}
} while (nr_rec);
mutex_unlock(&mds->event.log_lock);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
}
/**
* cxl_mem_get_event_records - Get Event Records from the device
* @mds: The driver data for the operation
* @status: Event Status register value identifying which events are available.
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
*
* Retrieve all event records available on the device, report them as trace
* events, and clear them.
*
* See CXL rev 3.0 @8.2.9.2.2 Get Event Records
* See CXL rev 3.0 @8.2.9.2.3 Clear Event Records
*/
void cxl_mem_get_event_records(struct cxl_memdev_state *mds, u32 status)
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
{
dev_dbg(mds->cxlds.dev, "Reading event logs: %x\n", status);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (status & CXLDEV_EVENT_STATUS_FATAL)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FATAL);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (status & CXLDEV_EVENT_STATUS_FAIL)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_FAIL);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (status & CXLDEV_EVENT_STATUS_WARN)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_WARN);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
if (status & CXLDEV_EVENT_STATUS_INFO)
cxl_mem_get_records_log(mds, CXL_EVENT_TYPE_INFO);
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_event_records, "CXL");
cxl/mem: Read, trace, and clear events on driver load CXL devices have multiple event logs which can be queried for CXL event records. Devices are required to support the storage of at least one event record in each event log type. Devices track event log overflow by incrementing a counter and tracking the time of the first and last overflow event seen. Software queries events via the Get Event Record mailbox command; CXL rev 3.0 section 8.2.9.2.2 and clears events via CXL rev 3.0 section 8.2.9.2.3 Clear Event Records mailbox command. If the result of negotiating CXL Error Reporting Control is OS control, read and clear all event logs on driver load. Ensure a clean slate of events by reading and clearing the events on driver load. The status register is not used because a device may continue to trigger events and the only requirement is to empty the log at least once. This allows for the required transition from empty to non-empty for interrupt generation. Handling of interrupts is in a follow on patch. The device can return up to 1MB worth of event records per query. Allocate a shared large buffer to handle the max number of records based on the mailbox payload size. This patch traces a raw event record and leaves specific event record type tracing to subsequent patches. Macros are created to aid in tracing the common CXL Event header fields. Each record is cleared explicitly. A clear all bit is specified but is only valid when the log overflows. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Signed-off-by: Ira Weiny <ira.weiny@intel.com> Link: https://lore.kernel.org/r/20221216-cxl-ev-log-v7-1-2316a5c8f7d8@intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-01-17 21:53:36 -08:00
/**
* cxl_mem_get_partition_info - Get partition info
* @mds: The driver data for the operation
*
* Retrieve the current partition info for the device specified. The active
* values are the current capacity in bytes. If not 0, the 'next' values are
* the pending values, in bytes, which take affect on next cold reset.
*
* Return: 0 if no error: or the result of the mailbox command.
*
* See CXL @8.2.9.5.2.1 Get Partition Info
*/
static int cxl_mem_get_partition_info(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_partition_info pi;
struct cxl_mbox_cmd mbox_cmd;
int rc;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_PARTITION_INFO,
.size_out = sizeof(pi),
.payload_out = &pi,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
return rc;
mds->active_volatile_bytes =
le64_to_cpu(pi.active_volatile_cap) * CXL_CAPACITY_MULTIPLIER;
mds->active_persistent_bytes =
le64_to_cpu(pi.active_persistent_cap) * CXL_CAPACITY_MULTIPLIER;
return 0;
}
/**
* cxl_dev_state_identify() - Send the IDENTIFY command to the device.
* @mds: The driver data for the operation
*
* Return: 0 if identify was executed successfully or media not ready.
*
* This will dispatch the identify command to the device and on success populate
* structures to be exported to sysfs.
*/
int cxl_dev_state_identify(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
/* See CXL 2.0 Table 175 Identify Memory Device Output Payload */
struct cxl_mbox_identify id;
struct cxl_mbox_cmd mbox_cmd;
u32 val;
int rc;
if (!mds->cxlds.media_ready)
return 0;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_IDENTIFY,
.size_out = sizeof(id),
.payload_out = &id,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0)
return rc;
mds->total_bytes =
le64_to_cpu(id.total_capacity) * CXL_CAPACITY_MULTIPLIER;
mds->volatile_only_bytes =
le64_to_cpu(id.volatile_capacity) * CXL_CAPACITY_MULTIPLIER;
mds->persistent_only_bytes =
le64_to_cpu(id.persistent_capacity) * CXL_CAPACITY_MULTIPLIER;
mds->partition_align_bytes =
le64_to_cpu(id.partition_align) * CXL_CAPACITY_MULTIPLIER;
mds->lsa_size = le32_to_cpu(id.lsa_size);
memcpy(mds->firmware_version, id.fw_revision,
sizeof(id.fw_revision));
if (test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds)) {
val = get_unaligned_le24(id.poison_list_max_mer);
mds->poison.max_errors = min_t(u32, val, CXL_POISON_LIST_MAX);
}
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_dev_state_identify, "CXL");
static int __cxl_mem_sanitize(struct cxl_memdev_state *mds, u16 cmd)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
int rc;
u32 sec_out = 0;
struct cxl_get_security_output {
__le32 flags;
} out;
struct cxl_mbox_cmd sec_cmd = {
.opcode = CXL_MBOX_OP_GET_SECURITY_STATE,
.payload_out = &out,
.size_out = sizeof(out),
};
struct cxl_mbox_cmd mbox_cmd = { .opcode = cmd };
if (cmd != CXL_MBOX_OP_SANITIZE && cmd != CXL_MBOX_OP_SECURE_ERASE)
return -EINVAL;
rc = cxl_internal_send_cmd(cxl_mbox, &sec_cmd);
if (rc < 0) {
dev_err(cxl_mbox->host, "Failed to get security state : %d", rc);
return rc;
}
/*
* Prior to using these commands, any security applied to
* the user data areas of the device shall be DISABLED (or
* UNLOCKED for secure erase case).
*/
sec_out = le32_to_cpu(out.flags);
if (sec_out & CXL_PMEM_SEC_STATE_USER_PASS_SET)
return -EINVAL;
if (cmd == CXL_MBOX_OP_SECURE_ERASE &&
sec_out & CXL_PMEM_SEC_STATE_LOCKED)
return -EINVAL;
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc < 0) {
dev_err(cxl_mbox->host, "Failed to sanitize device : %d", rc);
return rc;
}
return 0;
}
/**
* cxl_mem_sanitize() - Send a sanitization command to the device.
* @cxlmd: The device for the operation
* @cmd: The specific sanitization command opcode
*
* Return: 0 if the command was executed successfully, regardless of
* whether or not the actual security operation is done in the background,
* such as for the Sanitize case.
* Error return values can be the result of the mailbox command, -EINVAL
* when security requirements are not met or invalid contexts, or -EBUSY
* if the sanitize operation is already in flight.
*
* See CXL 3.0 @8.2.9.8.5.1 Sanitize and @8.2.9.8.5.2 Secure Erase.
*/
int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd)
{
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_port *endpoint;
/* synchronize with cxl_mem_probe() and decoder write operations */
guard(device)(&cxlmd->dev);
endpoint = cxlmd->endpoint;
cxl: Convert to ACQUIRE() for conditional rwsem locking Use ACQUIRE() to cleanup conditional locking paths in the CXL driver The ACQUIRE() macro and its associated ACQUIRE_ERR() helpers, like scoped_cond_guard(), arrange for scoped-based conditional locking. Unlike scoped_cond_guard(), these macros arrange for an ERR_PTR() to be retrieved representing the state of the conditional lock. The goal of this conversion is to complete the removal of all explicit unlock calls in the subsystem. I.e. the methods to acquire a lock are solely via guard(), scoped_guard() (for limited cases), or ACQUIRE(). All unlock is implicit / scope-based. In order to make sure all lock sites are converted, the existing rwsem's are consolidated and renamed in 'struct cxl_rwsem'. While that makes the patch noisier it gives a clean cut-off between old-world (explicit unlock allowed), and new world (explicit unlock deleted). Cc: David Lechner <dlechner@baylibre.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Ingo Molnar <mingo@kernel.org> Cc: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com> Cc: Davidlohr Bueso <dave@stgolabs.net> Cc: Jonathan Cameron <jonathan.cameron@huawei.com> Cc: Dave Jiang <dave.jiang@intel.com> Cc: Alison Schofield <alison.schofield@intel.com> Cc: Vishal Verma <vishal.l.verma@intel.com> Cc: Ira Weiny <ira.weiny@intel.com> Cc: Shiju Jose <shiju.jose@huawei.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <jonathan.cameron@huawei.com> Reviewed-by: Fabio M. De Francesco <fabio.m.de.francesco@linux.intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Tested-by: Shiju Jose <shiju.jose@huawei.com> Link: https://patch.msgid.link/20250711234932.671292-9-dan.j.williams@intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-07-11 16:49:32 -07:00
guard(rwsem_read)(&cxl_rwsem.region);
/*
* Require an endpoint to be safe otherwise the driver can not
* be sure that the device is unmapped.
*/
if (endpoint && cxl_num_decoders_committed(endpoint) == 0)
return __cxl_mem_sanitize(mds, cmd);
return -EBUSY;
}
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode)
{
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
int i = info->nr_partitions;
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
if (size == 0)
return;
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
info->part[i].range = (struct range) {
.start = start,
.end = start + size - 1,
};
info->part[i].mode = mode;
info->nr_partitions++;
}
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info)
{
struct cxl_dev_state *cxlds = &mds->cxlds;
struct device *dev = cxlds->dev;
int rc;
if (!cxlds->media_ready) {
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
info->size = 0;
return 0;
}
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
info->size = mds->total_bytes;
if (mds->partition_align_bytes == 0) {
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM);
add_part(info, mds->volatile_only_bytes,
mds->persistent_only_bytes, CXL_PARTMODE_PMEM);
return 0;
}
rc = cxl_mem_get_partition_info(mds);
if (rc) {
dev_err(dev, "Failed to query partition information\n");
return rc;
}
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM);
add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes,
CXL_PARTMODE_PMEM);
return 0;
}
cxl: Introduce 'struct cxl_dpa_partition' and 'struct cxl_range_info' The pending efforts to add CXL Accelerator (type-2) device [1], and Dynamic Capacity (DCD) support [2], tripped on the no-longer-fit-for-purpose design in the CXL subsystem for tracking device-physical-address (DPA) metadata. Trip hazards include: - CXL Memory Devices need to consider a PMEM partition, but Accelerator devices with CXL.mem likely do not in the common case. - CXL Memory Devices enumerate DPA through Memory Device mailbox commands like Partition Info, Accelerators devices do not. - CXL Memory Devices that support DCD support more than 2 partitions. Some of the driver algorithms are awkward to expand to > 2 partition cases. - DPA performance data is a general capability that can be shared with accelerators, so tracking it in 'struct cxl_memdev_state' is no longer suitable. - Hardcoded assumptions around the PMEM partition always being index-1 if RAM is zero-sized or PMEM is zero sized. - 'enum cxl_decoder_mode' is sometimes a partition id and sometimes a memory property, it should be phased in favor of a partition id and the memory property comes from the partition info. Towards cleaning up those issues and allowing a smoother landing for the aforementioned pending efforts, introduce a 'struct cxl_dpa_partition' array to 'struct cxl_dev_state', and 'struct cxl_range_info' as a shared way for Memory Devices and Accelerators to initialize the DPA information in 'struct cxl_dev_state'. For now, split a new cxl_dpa_setup() from cxl_mem_create_range_info() to get the new data structure initialized, and cleanup some qos_class init. Follow on patches will go further to use the new data structure to cleanup algorithms that are better suited to loop over all possible partitions. cxl_dpa_setup() follows the locking expectations of mutating the device DPA map, and is suitable for Accelerator drivers to use. Accelerators likely only have one hardcoded 'ram' partition to convey to the cxl_core. Link: http://lore.kernel.org/20241230214445.27602-1-alejandro.lucero-palau@amd.com [1] Link: http://lore.kernel.org/20241210-dcd-type2-upstream-v8-0-812852504400@intel.com [2] Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Reviewed-by: Alejandro Lucero <alucerop@amd.com> Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Tested-by: Alejandro Lucero <alucerop@amd.com> Link: https://patch.msgid.link/173864305827.668823.13978794102080021276.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2025-02-03 20:24:18 -08:00
EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL");
int cxl_get_dirty_count(struct cxl_memdev_state *mds, u32 *count)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_get_health_info_out hi;
struct cxl_mbox_cmd mbox_cmd;
int rc;
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_GET_HEALTH_INFO,
.size_out = sizeof(hi),
.payload_out = &hi,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (!rc)
*count = le32_to_cpu(hi.dirty_shutdown_cnt);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_get_dirty_count, "CXL");
int cxl_arm_dirty_shutdown(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_set_shutdown_state_in in = {
.state = 1
};
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_SHUTDOWN_STATE,
.size_in = sizeof(in),
.payload_in = &in,
};
return cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
}
EXPORT_SYMBOL_NS_GPL(cxl_arm_dirty_shutdown, "CXL");
int cxl_set_timestamp(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
struct cxl_mbox_cmd mbox_cmd;
struct cxl_mbox_set_timestamp_in pi;
int rc;
pi.timestamp = cpu_to_le64(ktime_get_real_ns());
mbox_cmd = (struct cxl_mbox_cmd) {
.opcode = CXL_MBOX_OP_SET_TIMESTAMP,
.size_in = sizeof(pi),
.payload_in = &pi,
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
/*
* Command is optional. Devices may have another way of providing
* a timestamp, or may return all 0s in timestamp fields.
* Don't report an error if this command isn't supported
*/
if (rc && (mbox_cmd.return_code != CXL_MBOX_CMD_RC_UNSUPPORTED))
return rc;
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_set_timestamp, "CXL");
int cxl_mem_get_poison(struct cxl_memdev *cxlmd, u64 offset, u64 len,
struct cxl_region *cxlr)
{
struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds);
struct cxl_mailbox *cxl_mbox = &cxlmd->cxlds->cxl_mbox;
struct cxl_mbox_poison_out *po;
struct cxl_mbox_poison_in pi;
int nr_records = 0;
int rc;
ACQUIRE(mutex_intr, lock)(&mds->poison.mutex);
if ((rc = ACQUIRE_ERR(mutex_intr, &lock)))
return rc;
po = mds->poison.list_out;
pi.offset = cpu_to_le64(offset);
pi.length = cpu_to_le64(len / CXL_POISON_LEN_MULT);
do {
cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() A recent change to cxl_mem_get_records_log() [1] highlighted a subtle nuance of looping calls to cxl_internal_send_cmd(), i.e. that cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd argument. That mechanism is useful for communicating underflow, but it is unwanted when reusing @mbox_cmd for a subsequent submission. It turns out that cxl_xfer_log() avoids this scenario by always redefining @mbox_cmd each iteration. Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration. The cxl_mem_get_records_log() change is just a style fixup, but the cxl_mem_get_poison() change is a potential fix, per Alison [2]: Poison list retrieval can hit this case if the MORE flag is set and a follow on read of the list delivers more records than the previous read. ie. device gives one record, sets the _MORE flag, then gives 5. Not an urgent fix since this behavior has not been seen in the wild, but worth tracking as a fix. Cc: Kwangjin Ko <kwangjin.ko@sk.com> Cc: Alison Schofield <alison.schofield@intel.com> Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command") Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1] Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2] Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://lore.kernel.org/r/171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-04-05 15:00:16 -07:00
struct cxl_mbox_cmd mbox_cmd = (struct cxl_mbox_cmd){
.opcode = CXL_MBOX_OP_GET_POISON,
.size_in = sizeof(pi),
.payload_in = &pi,
.size_out = cxl_mbox->payload_size,
cxl/core: Fix potential payload size confusion in cxl_mem_get_poison() A recent change to cxl_mem_get_records_log() [1] highlighted a subtle nuance of looping calls to cxl_internal_send_cmd(), i.e. that cxl_internal_send_cmd() modifies the 'size_out' member of the @mbox_cmd argument. That mechanism is useful for communicating underflow, but it is unwanted when reusing @mbox_cmd for a subsequent submission. It turns out that cxl_xfer_log() avoids this scenario by always redefining @mbox_cmd each iteration. Update cxl_mem_get_records_log() and cxl_mem_get_poison() to follow the same style as cxl_xfer_log(), i.e. re-define @mbox_cmd each iteration. The cxl_mem_get_records_log() change is just a style fixup, but the cxl_mem_get_poison() change is a potential fix, per Alison [2]: Poison list retrieval can hit this case if the MORE flag is set and a follow on read of the list delivers more records than the previous read. ie. device gives one record, sets the _MORE flag, then gives 5. Not an urgent fix since this behavior has not been seen in the wild, but worth tracking as a fix. Cc: Kwangjin Ko <kwangjin.ko@sk.com> Cc: Alison Schofield <alison.schofield@intel.com> Fixes: ed83f7ca398b ("cxl/mbox: Add GET_POISON_LIST mailbox command") Link: http://lore.kernel.org/r/20240402081404.1106-2-kwangjin.ko@sk.com [1] Link: http://lore.kernel.org/r/ZhAhAL/GOaWFrauw@aschofie-mobl2 [2] Signed-off-by: Dan Williams <dan.j.williams@intel.com> Reviewed-by: Ira Weiny <ira.weiny@intel.com> Reviewed-by: Alison Schofield <alison.schofield@intel.com> Link: https://lore.kernel.org/r/171235441633.2716581.12330082428680958635.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dave Jiang <dave.jiang@intel.com>
2024-04-05 15:00:16 -07:00
.payload_out = po,
.min_out = struct_size(po, record, 0),
};
rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
if (rc)
break;
for (int i = 0; i < le16_to_cpu(po->count); i++)
trace_cxl_poison(cxlmd, cxlr, &po->record[i],
po->flags, po->overflow_ts,
CXL_POISON_TRACE_LIST);
/* Protect against an uncleared _FLAG_MORE */
nr_records = nr_records + le16_to_cpu(po->count);
if (nr_records >= mds->poison.max_errors) {
dev_dbg(&cxlmd->dev, "Max Error Records reached: %d\n",
nr_records);
break;
}
} while (po->flags & CXL_POISON_FLAG_MORE);
return rc;
}
EXPORT_SYMBOL_NS_GPL(cxl_mem_get_poison, "CXL");
static void free_poison_buf(void *buf)
{
kvfree(buf);
}
/* Get Poison List output buffer is protected by mds->poison.lock */
static int cxl_poison_alloc_buf(struct cxl_memdev_state *mds)
{
struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
mds->poison.list_out = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
if (!mds->poison.list_out)
return -ENOMEM;
return devm_add_action_or_reset(mds->cxlds.dev, free_poison_buf,
mds->poison.list_out);
}
int cxl_poison_state_init(struct cxl_memdev_state *mds)
{
int rc;
if (!test_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds))
return 0;
rc = cxl_poison_alloc_buf(mds);
if (rc) {
clear_bit(CXL_POISON_ENABLED_LIST, mds->poison.enabled_cmds);
return rc;
}
mutex_init(&mds->poison.mutex);
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_poison_state_init, "CXL");
int cxl_mailbox_init(struct cxl_mailbox *cxl_mbox, struct device *host)
{
if (!cxl_mbox || !host)
return -EINVAL;
cxl_mbox->host = host;
mutex_init(&cxl_mbox->mbox_mutex);
rcuwait_init(&cxl_mbox->mbox_wait);
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_mailbox_init, "CXL");
struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
{
struct cxl_memdev_state *mds;
2025-02-26 09:21:21 -07:00
int rc;
mds = devm_kzalloc(dev, sizeof(*mds), GFP_KERNEL);
if (!mds) {
dev_err(dev, "No memory available\n");
return ERR_PTR(-ENOMEM);
}
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
mds->cxlds.reg_map.host = dev;
mds->cxlds.cxl_mbox.host = dev;
mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE;
cxl/memdev: Make mailbox functionality optional In support of the Linux CXL core scaling for a wider set of CXL devices, allow for the creation of memdevs with some memory device capabilities disabled. Specifically, allow for CXL devices outside of those claiming to be compliant with the generic CXL memory device class code, like vendor specific Type-2/3 devices that host CXL.mem. This implies, allow for the creation of memdevs that only support component-registers, not necessarily memory-device-registers (like mailbox registers). A memdev derived from a CXL endpoint that does not support generic class code expectations is tagged "CXL_DEVTYPE_DEVMEM", while a memdev derived from a class-code compliant endpoint is tagged "CXL_DEVTYPE_CLASSMEM". The primary assumption of a CXL_DEVTYPE_DEVMEM memdev is that it optionally may not host a mailbox. Disable the command passthrough ioctl for memdevs that are not CXL_DEVTYPE_CLASSMEM, and return empty strings from memdev attributes associated with data retrieved via the class-device-standard IDENTIFY command. Note that empty strings were chosen over attribute visibility to maintain compatibility with shipping versions of cxl-cli that expect those attributes to always be present. Once cxl-cli has dropped that requirement this workaround can be deprecated. Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> Reviewed-by: Dave Jiang <dave.jiang@intel.com> Link: https://lore.kernel.org/r/168679260782.3436160.7587293613945445365.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams <dan.j.williams@intel.com>
2023-06-14 18:30:07 -07:00
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
2025-02-26 09:21:21 -07:00
rc = devm_cxl_register_mce_notifier(dev, &mds->mce_notifier);
if (rc == -EOPNOTSUPP)
dev_warn(dev, "CXL MCE unsupported\n");
else if (rc)
2025-02-26 09:21:21 -07:00
return ERR_PTR(rc);
return mds;
}
EXPORT_SYMBOL_NS_GPL(cxl_memdev_state_create, "CXL");
void __init cxl_mbox_init(void)
{
struct dentry *mbox_debugfs;
mbox_debugfs = cxl_debugfs_create_dir("mbox");
debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
&cxl_raw_allow_all);
}