2019-03-11 14:55:59 -06:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Copyright (c) 2019, Intel Corporation.
|
|
|
|
*
|
|
|
|
* Heterogeneous Memory Attributes Table (HMAT) representation
|
|
|
|
*
|
|
|
|
* This program parses and reports the platform's HMAT tables, and registers
|
|
|
|
* the applicable attributes with the node's interfaces.
|
|
|
|
*/
|
|
|
|
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
#define pr_fmt(fmt) "acpi/hmat: " fmt
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
#include <linux/acpi.h>
|
|
|
|
#include <linux/bitops.h>
|
|
|
|
#include <linux/device.h>
|
|
|
|
#include <linux/init.h>
|
|
|
|
#include <linux/list.h>
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
#include <linux/mm.h>
|
|
|
|
#include <linux/platform_device.h>
|
2019-03-11 14:56:03 -06:00
|
|
|
#include <linux/list_sort.h>
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
#include <linux/memregion.h>
|
2019-08-05 08:27:05 -06:00
|
|
|
#include <linux/memory.h>
|
|
|
|
#include <linux/mutex.h>
|
2019-03-11 14:55:59 -06:00
|
|
|
#include <linux/node.h>
|
|
|
|
#include <linux/sysfs.h>
|
2020-10-13 16:49:13 -07:00
|
|
|
#include <linux/dax.h>
|
2023-09-26 14:06:27 +08:00
|
|
|
#include <linux/memory-tiers.h>
|
2019-03-11 14:55:59 -06:00
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static u8 hmat_revision;
|
2020-10-13 16:49:02 -07:00
|
|
|
static int hmat_disable __initdata;
|
|
|
|
|
|
|
|
void __init disable_hmat(void)
|
|
|
|
{
|
|
|
|
hmat_disable = 1;
|
|
|
|
}
|
2019-03-11 14:55:59 -06:00
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static LIST_HEAD(targets);
|
|
|
|
static LIST_HEAD(initiators);
|
|
|
|
static LIST_HEAD(localities);
|
|
|
|
|
|
|
|
static DEFINE_MUTEX(target_lock);
|
2019-03-11 14:56:03 -06:00
|
|
|
|
|
|
|
/*
|
|
|
|
* The defined enum order is used to prioritize attributes to break ties when
|
|
|
|
* selecting the best performing node.
|
|
|
|
*/
|
|
|
|
enum locality_types {
|
|
|
|
WRITE_LATENCY,
|
|
|
|
READ_LATENCY,
|
|
|
|
WRITE_BANDWIDTH,
|
|
|
|
READ_BANDWIDTH,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct memory_locality *localities_types[4];
|
|
|
|
|
2019-08-05 08:27:04 -06:00
|
|
|
struct target_cache {
|
|
|
|
struct list_head node;
|
|
|
|
struct node_cache_attrs cache_attrs;
|
|
|
|
};
|
|
|
|
|
2023-12-21 15:02:43 -07:00
|
|
|
enum {
|
2024-03-08 14:59:22 -07:00
|
|
|
NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL = ACCESS_COORDINATE_MAX,
|
|
|
|
NODE_ACCESS_CLASS_GENPORT_SINK_CPU,
|
2023-12-21 15:02:43 -07:00
|
|
|
NODE_ACCESS_CLASS_MAX,
|
|
|
|
};
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
struct memory_target {
|
|
|
|
struct list_head node;
|
|
|
|
unsigned int memory_pxm;
|
|
|
|
unsigned int processor_pxm;
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
struct resource memregions;
|
2023-12-21 15:02:43 -07:00
|
|
|
struct access_coordinate coord[NODE_ACCESS_CLASS_MAX];
|
2019-08-05 08:27:04 -06:00
|
|
|
struct list_head caches;
|
2019-08-05 08:27:05 -06:00
|
|
|
struct node_cache_attrs cache_attrs;
|
2023-12-21 15:02:49 -07:00
|
|
|
u8 gen_port_device_handle[ACPI_SRAT_DEVICE_HANDLE_SIZE];
|
2019-08-05 08:27:05 -06:00
|
|
|
bool registered;
|
2024-03-08 14:59:30 -07:00
|
|
|
bool ext_updated; /* externally updated */
|
2019-03-11 14:56:03 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
struct memory_initiator {
|
|
|
|
struct list_head node;
|
|
|
|
unsigned int processor_pxm;
|
2020-09-30 22:05:46 +08:00
|
|
|
bool has_cpu;
|
2019-03-11 14:56:03 -06:00
|
|
|
};
|
|
|
|
|
|
|
|
struct memory_locality {
|
|
|
|
struct list_head node;
|
|
|
|
struct acpi_hmat_locality *hmat_loc;
|
|
|
|
};
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static struct memory_initiator *find_mem_initiator(unsigned int cpu_pxm)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
struct memory_initiator *initiator;
|
|
|
|
|
|
|
|
list_for_each_entry(initiator, &initiators, node)
|
|
|
|
if (initiator->processor_pxm == cpu_pxm)
|
|
|
|
return initiator;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static struct memory_target *find_mem_target(unsigned int mem_pxm)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
|
|
|
|
list_for_each_entry(target, &targets, node)
|
|
|
|
if (target->memory_pxm == mem_pxm)
|
|
|
|
return target;
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2025-02-26 09:21:19 -07:00
|
|
|
/**
|
|
|
|
* hmat_get_extended_linear_cache_size - Retrieve the extended linear cache size
|
|
|
|
* @backing_res: resource from the backing media
|
|
|
|
* @nid: node id for the memory region
|
|
|
|
* @cache_size: (Output) size of extended linear cache.
|
|
|
|
*
|
|
|
|
* Return: 0 on success. Errno on failure.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
int hmat_get_extended_linear_cache_size(struct resource *backing_res, int nid,
|
|
|
|
resource_size_t *cache_size)
|
|
|
|
{
|
|
|
|
unsigned int pxm = node_to_pxm(nid);
|
|
|
|
struct memory_target *target;
|
|
|
|
struct target_cache *tcache;
|
|
|
|
struct resource *res;
|
|
|
|
|
|
|
|
target = find_mem_target(pxm);
|
|
|
|
if (!target)
|
|
|
|
return -ENOENT;
|
|
|
|
|
|
|
|
list_for_each_entry(tcache, &target->caches, node) {
|
|
|
|
if (tcache->cache_attrs.address_mode !=
|
|
|
|
NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
res = &target->memregions;
|
|
|
|
if (!resource_contains(res, backing_res))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
*cache_size = tcache->cache_attrs.size;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
*cache_size = 0;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_NS_GPL(hmat_get_extended_linear_cache_size, "CXL");
|
|
|
|
|
2023-12-21 15:03:07 -07:00
|
|
|
static struct memory_target *acpi_find_genport_target(u32 uid)
|
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
u32 target_uid;
|
|
|
|
u8 *uid_ptr;
|
|
|
|
|
|
|
|
list_for_each_entry(target, &targets, node) {
|
|
|
|
uid_ptr = target->gen_port_device_handle + 8;
|
|
|
|
target_uid = *(u32 *)uid_ptr;
|
|
|
|
if (uid == target_uid)
|
|
|
|
return target;
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* acpi_get_genport_coordinates - Retrieve the access coordinates for a generic port
|
|
|
|
* @uid: ACPI unique id
|
2024-03-08 14:59:23 -07:00
|
|
|
* @coord: The access coordinates written back out for the generic port.
|
|
|
|
* Expect 2 levels array.
|
2023-12-21 15:03:07 -07:00
|
|
|
*
|
|
|
|
* Return: 0 on success. Errno on failure.
|
|
|
|
*
|
|
|
|
* Only supports device handles that are ACPI. Assume ACPI0016 HID for CXL.
|
|
|
|
*/
|
|
|
|
int acpi_get_genport_coordinates(u32 uid,
|
|
|
|
struct access_coordinate *coord)
|
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
|
|
|
|
guard(mutex)(&target_lock);
|
|
|
|
target = acpi_find_genport_target(uid);
|
|
|
|
if (!target)
|
|
|
|
return -ENOENT;
|
|
|
|
|
2024-03-08 14:59:23 -07:00
|
|
|
coord[ACCESS_COORDINATE_LOCAL] =
|
|
|
|
target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL];
|
|
|
|
coord[ACCESS_COORDINATE_CPU] =
|
|
|
|
target->coord[NODE_ACCESS_CLASS_GENPORT_SINK_CPU];
|
2023-12-21 15:03:07 -07:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
module: Convert symbol namespace to string literal
Clean up the existing export namespace code along the same lines of
commit 33def8498fdd ("treewide: Convert macro and uses of __section(foo)
to __section("foo")") and for the same reason, it is not desired for the
namespace argument to be a macro expansion itself.
Scripted using
git grep -l -e MODULE_IMPORT_NS -e EXPORT_SYMBOL_NS | while read file;
do
awk -i inplace '
/^#define EXPORT_SYMBOL_NS/ {
gsub(/__stringify\(ns\)/, "ns");
print;
next;
}
/^#define MODULE_IMPORT_NS/ {
gsub(/__stringify\(ns\)/, "ns");
print;
next;
}
/MODULE_IMPORT_NS/ {
$0 = gensub(/MODULE_IMPORT_NS\(([^)]*)\)/, "MODULE_IMPORT_NS(\"\\1\")", "g");
}
/EXPORT_SYMBOL_NS/ {
if ($0 ~ /(EXPORT_SYMBOL_NS[^(]*)\(([^,]+),/) {
if ($0 !~ /(EXPORT_SYMBOL_NS[^(]*)\(([^,]+), ([^)]+)\)/ &&
$0 !~ /(EXPORT_SYMBOL_NS[^(]*)\(\)/ &&
$0 !~ /^my/) {
getline line;
gsub(/[[:space:]]*\\$/, "");
gsub(/[[:space:]]/, "", line);
$0 = $0 " " line;
}
$0 = gensub(/(EXPORT_SYMBOL_NS[^(]*)\(([^,]+), ([^)]+)\)/,
"\\1(\\2, \"\\3\")", "g");
}
}
{ print }' $file;
done
Requested-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://mail.google.com/mail/u/2/#inbox/FMfcgzQXKWgMmjdFwwdsfgxzKpVHWPlc
Acked-by: Greg KH <gregkh@linuxfoundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2024-12-02 15:59:47 +01:00
|
|
|
EXPORT_SYMBOL_NS_GPL(acpi_get_genport_coordinates, "CXL");
|
2023-12-21 15:03:07 -07:00
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
static __init void alloc_memory_initiator(unsigned int cpu_pxm)
|
|
|
|
{
|
|
|
|
struct memory_initiator *initiator;
|
|
|
|
|
|
|
|
if (pxm_to_node(cpu_pxm) == NUMA_NO_NODE)
|
|
|
|
return;
|
|
|
|
|
|
|
|
initiator = find_mem_initiator(cpu_pxm);
|
|
|
|
if (initiator)
|
|
|
|
return;
|
|
|
|
|
|
|
|
initiator = kzalloc(sizeof(*initiator), GFP_KERNEL);
|
|
|
|
if (!initiator)
|
|
|
|
return;
|
|
|
|
|
|
|
|
initiator->processor_pxm = cpu_pxm;
|
2020-09-30 22:05:46 +08:00
|
|
|
initiator->has_cpu = node_state(pxm_to_node(cpu_pxm), N_CPU);
|
2019-03-11 14:56:03 -06:00
|
|
|
list_add_tail(&initiator->node, &initiators);
|
|
|
|
}
|
|
|
|
|
2023-12-21 15:02:49 -07:00
|
|
|
static __init struct memory_target *alloc_target(unsigned int mem_pxm)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
|
|
|
|
target = find_mem_target(mem_pxm);
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
if (!target) {
|
|
|
|
target = kzalloc(sizeof(*target), GFP_KERNEL);
|
|
|
|
if (!target)
|
2023-12-21 15:02:49 -07:00
|
|
|
return NULL;
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
target->memory_pxm = mem_pxm;
|
|
|
|
target->processor_pxm = PXM_INVAL;
|
|
|
|
target->memregions = (struct resource) {
|
|
|
|
.name = "ACPI mem",
|
|
|
|
.start = 0,
|
|
|
|
.end = -1,
|
|
|
|
.flags = IORESOURCE_MEM,
|
|
|
|
};
|
|
|
|
list_add_tail(&target->node, &targets);
|
|
|
|
INIT_LIST_HEAD(&target->caches);
|
|
|
|
}
|
2019-03-11 14:56:03 -06:00
|
|
|
|
2023-12-21 15:02:49 -07:00
|
|
|
return target;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __init void alloc_memory_target(unsigned int mem_pxm,
|
|
|
|
resource_size_t start,
|
|
|
|
resource_size_t len)
|
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
|
|
|
|
target = alloc_target(mem_pxm);
|
|
|
|
if (!target)
|
|
|
|
return;
|
|
|
|
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
/*
|
|
|
|
* There are potentially multiple ranges per PXM, so record each
|
|
|
|
* in the per-target memregions resource tree.
|
|
|
|
*/
|
|
|
|
if (!__request_region(&target->memregions, start, len, "memory target",
|
|
|
|
IORESOURCE_MEM))
|
|
|
|
pr_warn("failed to reserve %#llx - %#llx in pxm: %d\n",
|
|
|
|
start, start + len, mem_pxm);
|
2019-03-11 14:56:03 -06:00
|
|
|
}
|
|
|
|
|
2023-12-21 15:02:49 -07:00
|
|
|
static __init void alloc_genport_target(unsigned int mem_pxm, u8 *handle)
|
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
|
|
|
|
target = alloc_target(mem_pxm);
|
|
|
|
if (!target)
|
|
|
|
return;
|
|
|
|
|
|
|
|
memcpy(target->gen_port_device_handle, handle,
|
|
|
|
ACPI_SRAT_DEVICE_HANDLE_SIZE);
|
|
|
|
}
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
static __init const char *hmat_data_type(u8 type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case ACPI_HMAT_ACCESS_LATENCY:
|
|
|
|
return "Access Latency";
|
|
|
|
case ACPI_HMAT_READ_LATENCY:
|
|
|
|
return "Read Latency";
|
|
|
|
case ACPI_HMAT_WRITE_LATENCY:
|
|
|
|
return "Write Latency";
|
|
|
|
case ACPI_HMAT_ACCESS_BANDWIDTH:
|
|
|
|
return "Access Bandwidth";
|
|
|
|
case ACPI_HMAT_READ_BANDWIDTH:
|
|
|
|
return "Read Bandwidth";
|
|
|
|
case ACPI_HMAT_WRITE_BANDWIDTH:
|
|
|
|
return "Write Bandwidth";
|
|
|
|
default:
|
|
|
|
return "Reserved";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static __init const char *hmat_data_type_suffix(u8 type)
|
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case ACPI_HMAT_ACCESS_LATENCY:
|
|
|
|
case ACPI_HMAT_READ_LATENCY:
|
|
|
|
case ACPI_HMAT_WRITE_LATENCY:
|
|
|
|
return " nsec";
|
|
|
|
case ACPI_HMAT_ACCESS_BANDWIDTH:
|
|
|
|
case ACPI_HMAT_READ_BANDWIDTH:
|
|
|
|
case ACPI_HMAT_WRITE_BANDWIDTH:
|
|
|
|
return " MB/s";
|
|
|
|
default:
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static u32 hmat_normalize(u16 entry, u64 base, u8 type)
|
2019-03-11 14:55:59 -06:00
|
|
|
{
|
|
|
|
u32 value;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check for invalid and overflow values
|
|
|
|
*/
|
|
|
|
if (entry == 0xffff || !entry)
|
|
|
|
return 0;
|
|
|
|
else if (base > (UINT_MAX / (entry)))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Divide by the base unit for version 1, convert latency from
|
|
|
|
* picosenonds to nanoseconds if revision 2.
|
|
|
|
*/
|
|
|
|
value = entry * base;
|
|
|
|
if (hmat_revision == 1) {
|
|
|
|
if (value < 10)
|
|
|
|
return 0;
|
|
|
|
value = DIV_ROUND_UP(value, 10);
|
|
|
|
} else if (hmat_revision == 2) {
|
|
|
|
switch (type) {
|
|
|
|
case ACPI_HMAT_ACCESS_LATENCY:
|
|
|
|
case ACPI_HMAT_READ_LATENCY:
|
|
|
|
case ACPI_HMAT_WRITE_LATENCY:
|
|
|
|
value = DIV_ROUND_UP(value, 1000);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return value;
|
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static void hmat_update_target_access(struct memory_target *target,
|
2020-09-30 22:05:46 +08:00
|
|
|
u8 type, u32 value, int access)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
switch (type) {
|
|
|
|
case ACPI_HMAT_ACCESS_LATENCY:
|
2023-12-21 15:02:37 -07:00
|
|
|
target->coord[access].read_latency = value;
|
|
|
|
target->coord[access].write_latency = value;
|
2019-03-11 14:56:03 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_READ_LATENCY:
|
2023-12-21 15:02:37 -07:00
|
|
|
target->coord[access].read_latency = value;
|
2019-03-11 14:56:03 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_WRITE_LATENCY:
|
2023-12-21 15:02:37 -07:00
|
|
|
target->coord[access].write_latency = value;
|
2019-03-11 14:56:03 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_ACCESS_BANDWIDTH:
|
2023-12-21 15:02:37 -07:00
|
|
|
target->coord[access].read_bandwidth = value;
|
|
|
|
target->coord[access].write_bandwidth = value;
|
2019-03-11 14:56:03 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_READ_BANDWIDTH:
|
2023-12-21 15:02:37 -07:00
|
|
|
target->coord[access].read_bandwidth = value;
|
2019-03-11 14:56:03 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_WRITE_BANDWIDTH:
|
2023-12-21 15:02:37 -07:00
|
|
|
target->coord[access].write_bandwidth = value;
|
2019-03-11 14:56:03 -06:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-08 14:59:30 -07:00
|
|
|
int hmat_update_target_coordinates(int nid, struct access_coordinate *coord,
|
|
|
|
enum access_coordinate_class access)
|
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
int pxm;
|
|
|
|
|
|
|
|
if (nid == NUMA_NO_NODE)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
pxm = node_to_pxm(nid);
|
|
|
|
guard(mutex)(&target_lock);
|
|
|
|
target = find_mem_target(pxm);
|
|
|
|
if (!target)
|
|
|
|
return -ENODEV;
|
|
|
|
|
|
|
|
hmat_update_target_access(target, ACPI_HMAT_READ_LATENCY,
|
|
|
|
coord->read_latency, access);
|
|
|
|
hmat_update_target_access(target, ACPI_HMAT_WRITE_LATENCY,
|
|
|
|
coord->write_latency, access);
|
|
|
|
hmat_update_target_access(target, ACPI_HMAT_READ_BANDWIDTH,
|
|
|
|
coord->read_bandwidth, access);
|
|
|
|
hmat_update_target_access(target, ACPI_HMAT_WRITE_BANDWIDTH,
|
|
|
|
coord->write_bandwidth, access);
|
|
|
|
target->ext_updated = true;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
EXPORT_SYMBOL_GPL(hmat_update_target_coordinates);
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
static __init void hmat_add_locality(struct acpi_hmat_locality *hmat_loc)
|
|
|
|
{
|
|
|
|
struct memory_locality *loc;
|
|
|
|
|
|
|
|
loc = kzalloc(sizeof(*loc), GFP_KERNEL);
|
|
|
|
if (!loc) {
|
|
|
|
pr_notice_once("Failed to allocate HMAT locality\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
loc->hmat_loc = hmat_loc;
|
|
|
|
list_add_tail(&loc->node, &localities);
|
|
|
|
|
|
|
|
switch (hmat_loc->data_type) {
|
|
|
|
case ACPI_HMAT_ACCESS_LATENCY:
|
|
|
|
localities_types[READ_LATENCY] = loc;
|
|
|
|
localities_types[WRITE_LATENCY] = loc;
|
|
|
|
break;
|
|
|
|
case ACPI_HMAT_READ_LATENCY:
|
|
|
|
localities_types[READ_LATENCY] = loc;
|
|
|
|
break;
|
|
|
|
case ACPI_HMAT_WRITE_LATENCY:
|
|
|
|
localities_types[WRITE_LATENCY] = loc;
|
|
|
|
break;
|
|
|
|
case ACPI_HMAT_ACCESS_BANDWIDTH:
|
|
|
|
localities_types[READ_BANDWIDTH] = loc;
|
|
|
|
localities_types[WRITE_BANDWIDTH] = loc;
|
|
|
|
break;
|
|
|
|
case ACPI_HMAT_READ_BANDWIDTH:
|
|
|
|
localities_types[READ_BANDWIDTH] = loc;
|
|
|
|
break;
|
|
|
|
case ACPI_HMAT_WRITE_BANDWIDTH:
|
|
|
|
localities_types[WRITE_BANDWIDTH] = loc;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-12-21 15:02:55 -07:00
|
|
|
static __init void hmat_update_target(unsigned int tgt_pxm, unsigned int init_pxm,
|
|
|
|
u8 mem_hier, u8 type, u32 value)
|
|
|
|
{
|
|
|
|
struct memory_target *target = find_mem_target(tgt_pxm);
|
|
|
|
|
|
|
|
if (mem_hier != ACPI_HMAT_MEMORY)
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (target && target->processor_pxm == init_pxm) {
|
|
|
|
hmat_update_target_access(target, type, value,
|
2024-03-08 14:59:21 -07:00
|
|
|
ACCESS_COORDINATE_LOCAL);
|
2024-06-06 10:28:45 +08:00
|
|
|
/* If the node has a CPU, update access ACCESS_COORDINATE_CPU */
|
2023-12-21 15:02:55 -07:00
|
|
|
if (node_state(pxm_to_node(init_pxm), N_CPU))
|
|
|
|
hmat_update_target_access(target, type, value,
|
2024-03-08 14:59:21 -07:00
|
|
|
ACCESS_COORDINATE_CPU);
|
2023-12-21 15:02:55 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
static __init int hmat_parse_locality(union acpi_subtable_headers *header,
|
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_hmat_locality *hmat_loc = (void *)header;
|
|
|
|
unsigned int init, targ, total_size, ipds, tpds;
|
|
|
|
u32 *inits, *targs, value;
|
|
|
|
u16 *entries;
|
2019-03-11 14:56:03 -06:00
|
|
|
u8 type, mem_hier;
|
2019-03-11 14:55:59 -06:00
|
|
|
|
|
|
|
if (hmat_loc->header.length < sizeof(*hmat_loc)) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_notice("Unexpected locality header length: %u\n",
|
2019-03-11 14:55:59 -06:00
|
|
|
hmat_loc->header.length);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
type = hmat_loc->data_type;
|
2019-03-11 14:56:03 -06:00
|
|
|
mem_hier = hmat_loc->flags & ACPI_HMAT_MEMORY_HIERARCHY;
|
2019-03-11 14:55:59 -06:00
|
|
|
ipds = hmat_loc->number_of_initiator_Pds;
|
|
|
|
tpds = hmat_loc->number_of_target_Pds;
|
|
|
|
total_size = sizeof(*hmat_loc) + sizeof(*entries) * ipds * tpds +
|
|
|
|
sizeof(*inits) * ipds + sizeof(*targs) * tpds;
|
|
|
|
if (hmat_loc->header.length < total_size) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_notice("Unexpected locality header length:%u, minimum required:%u\n",
|
2019-03-11 14:55:59 -06:00
|
|
|
hmat_loc->header.length, total_size);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2024-01-31 00:30:21 -08:00
|
|
|
pr_debug("Locality: Flags:%02x Type:%s Initiator Domains:%u Target Domains:%u Base:%lld\n",
|
|
|
|
hmat_loc->flags, hmat_data_type(type), ipds, tpds,
|
|
|
|
hmat_loc->entry_base_unit);
|
2019-03-11 14:55:59 -06:00
|
|
|
|
|
|
|
inits = (u32 *)(hmat_loc + 1);
|
|
|
|
targs = inits + ipds;
|
|
|
|
entries = (u16 *)(targs + tpds);
|
|
|
|
for (init = 0; init < ipds; init++) {
|
2019-03-11 14:56:03 -06:00
|
|
|
alloc_memory_initiator(inits[init]);
|
2019-03-11 14:55:59 -06:00
|
|
|
for (targ = 0; targ < tpds; targ++) {
|
|
|
|
value = hmat_normalize(entries[init * tpds + targ],
|
|
|
|
hmat_loc->entry_base_unit,
|
|
|
|
type);
|
2024-01-31 00:30:21 -08:00
|
|
|
pr_debug(" Initiator-Target[%u-%u]:%u%s\n",
|
|
|
|
inits[init], targs[targ], value,
|
|
|
|
hmat_data_type_suffix(type));
|
2019-03-11 14:56:03 -06:00
|
|
|
|
2023-12-21 15:02:55 -07:00
|
|
|
hmat_update_target(targs[targ], inits[init],
|
|
|
|
mem_hier, type, value);
|
2019-03-11 14:55:59 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
if (mem_hier == ACPI_HMAT_MEMORY)
|
|
|
|
hmat_add_locality(hmat_loc);
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static __init int hmat_parse_cache(union acpi_subtable_headers *header,
|
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_hmat_cache *cache = (void *)header;
|
2019-08-05 08:27:04 -06:00
|
|
|
struct memory_target *target;
|
|
|
|
struct target_cache *tcache;
|
2019-03-11 14:55:59 -06:00
|
|
|
u32 attrs;
|
|
|
|
|
|
|
|
if (cache->header.length < sizeof(*cache)) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_notice("Unexpected cache header length: %u\n",
|
2019-03-11 14:55:59 -06:00
|
|
|
cache->header.length);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
attrs = cache->cache_attributes;
|
2024-01-31 00:30:21 -08:00
|
|
|
pr_debug("Cache: Domain:%u Size:%llu Attrs:%08x SMBIOS Handles:%d\n",
|
|
|
|
cache->memory_PD, cache->cache_size, attrs,
|
|
|
|
cache->number_of_SMBIOShandles);
|
2019-03-11 14:55:59 -06:00
|
|
|
|
2019-08-05 08:27:04 -06:00
|
|
|
target = find_mem_target(cache->memory_PD);
|
|
|
|
if (!target)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
tcache = kzalloc(sizeof(*tcache), GFP_KERNEL);
|
|
|
|
if (!tcache) {
|
|
|
|
pr_notice_once("Failed to allocate HMAT cache info\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
tcache->cache_attrs.size = cache->cache_size;
|
|
|
|
tcache->cache_attrs.level = (attrs & ACPI_HMAT_CACHE_LEVEL) >> 4;
|
|
|
|
tcache->cache_attrs.line_size = (attrs & ACPI_HMAT_CACHE_LINE_SIZE) >> 16;
|
2019-03-11 14:56:05 -06:00
|
|
|
|
|
|
|
switch ((attrs & ACPI_HMAT_CACHE_ASSOCIATIVITY) >> 8) {
|
|
|
|
case ACPI_HMAT_CA_DIRECT_MAPPED:
|
2019-08-05 08:27:04 -06:00
|
|
|
tcache->cache_attrs.indexing = NODE_CACHE_DIRECT_MAP;
|
2025-02-26 09:21:18 -07:00
|
|
|
/* Extended Linear mode is only valid if cache is direct mapped */
|
|
|
|
if (cache->address_mode == ACPI_HMAT_CACHE_MODE_EXTENDED_LINEAR) {
|
|
|
|
tcache->cache_attrs.address_mode =
|
|
|
|
NODE_CACHE_ADDR_MODE_EXTENDED_LINEAR;
|
|
|
|
}
|
2019-03-11 14:56:05 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_CA_COMPLEX_CACHE_INDEXING:
|
2019-08-05 08:27:04 -06:00
|
|
|
tcache->cache_attrs.indexing = NODE_CACHE_INDEXED;
|
2019-03-11 14:56:05 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_CA_NONE:
|
|
|
|
default:
|
2019-08-05 08:27:04 -06:00
|
|
|
tcache->cache_attrs.indexing = NODE_CACHE_OTHER;
|
2019-03-11 14:56:05 -06:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch ((attrs & ACPI_HMAT_WRITE_POLICY) >> 12) {
|
|
|
|
case ACPI_HMAT_CP_WB:
|
2019-08-05 08:27:04 -06:00
|
|
|
tcache->cache_attrs.write_policy = NODE_CACHE_WRITE_BACK;
|
2019-03-11 14:56:05 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_CP_WT:
|
2019-08-05 08:27:04 -06:00
|
|
|
tcache->cache_attrs.write_policy = NODE_CACHE_WRITE_THROUGH;
|
2019-03-11 14:56:05 -06:00
|
|
|
break;
|
|
|
|
case ACPI_HMAT_CP_NONE:
|
|
|
|
default:
|
2019-08-05 08:27:04 -06:00
|
|
|
tcache->cache_attrs.write_policy = NODE_CACHE_WRITE_OTHER;
|
2019-03-11 14:56:05 -06:00
|
|
|
break;
|
|
|
|
}
|
2019-08-05 08:27:04 -06:00
|
|
|
list_add_tail(&tcache->node, &target->caches);
|
2019-03-11 14:56:05 -06:00
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init hmat_parse_proximity_domain(union acpi_subtable_headers *header,
|
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_hmat_proximity_domain *p = (void *)header;
|
2019-04-06 21:12:22 -04:00
|
|
|
struct memory_target *target = NULL;
|
2019-03-11 14:55:59 -06:00
|
|
|
|
|
|
|
if (p->header.length != sizeof(*p)) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_notice("Unexpected address range header length: %u\n",
|
2019-03-11 14:55:59 -06:00
|
|
|
p->header.length);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (hmat_revision == 1)
|
2024-01-31 00:30:21 -08:00
|
|
|
pr_debug("Memory (%#llx length %#llx) Flags:%04x Processor Domain:%u Memory Domain:%u\n",
|
|
|
|
p->reserved3, p->reserved4, p->flags, p->processor_PD,
|
|
|
|
p->memory_PD);
|
2019-03-11 14:55:59 -06:00
|
|
|
else
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_info("Memory Flags:%04x Processor Domain:%u Memory Domain:%u\n",
|
2019-03-11 14:55:59 -06:00
|
|
|
p->flags, p->processor_PD, p->memory_PD);
|
|
|
|
|
2020-09-30 22:05:45 +08:00
|
|
|
if ((hmat_revision == 1 && p->flags & ACPI_HMAT_MEMORY_PD_VALID) ||
|
|
|
|
hmat_revision > 1) {
|
2019-03-11 14:56:03 -06:00
|
|
|
target = find_mem_target(p->memory_PD);
|
|
|
|
if (!target) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_debug("Memory Domain missing from SRAT\n");
|
2019-03-11 14:56:03 -06:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (target && p->flags & ACPI_HMAT_PROCESSOR_PD_VALID) {
|
|
|
|
int p_node = pxm_to_node(p->processor_PD);
|
|
|
|
|
|
|
|
if (p_node == NUMA_NO_NODE) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_debug("Invalid Processor Domain\n");
|
2019-03-11 14:56:03 -06:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
2019-10-28 10:11:18 +01:00
|
|
|
target->processor_pxm = p->processor_PD;
|
2019-03-11 14:56:03 -06:00
|
|
|
}
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int __init hmat_parse_subtable(union acpi_subtable_headers *header,
|
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_hmat_structure *hdr = (void *)header;
|
|
|
|
|
|
|
|
if (!hdr)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
switch (hdr->type) {
|
2019-04-17 11:13:10 -07:00
|
|
|
case ACPI_HMAT_TYPE_PROXIMITY:
|
2019-03-11 14:55:59 -06:00
|
|
|
return hmat_parse_proximity_domain(header, end);
|
|
|
|
case ACPI_HMAT_TYPE_LOCALITY:
|
|
|
|
return hmat_parse_locality(header, end);
|
|
|
|
case ACPI_HMAT_TYPE_CACHE:
|
|
|
|
return hmat_parse_cache(header, end);
|
|
|
|
default:
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
static __init int srat_parse_mem_affinity(union acpi_subtable_headers *header,
|
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_srat_mem_affinity *ma = (void *)header;
|
|
|
|
|
|
|
|
if (!ma)
|
|
|
|
return -EINVAL;
|
|
|
|
if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
|
|
|
|
return 0;
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
alloc_memory_target(ma->proximity_domain, ma->base_address, ma->length);
|
2019-03-11 14:56:03 -06:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-12-21 15:02:49 -07:00
|
|
|
static __init int srat_parse_genport_affinity(union acpi_subtable_headers *header,
|
|
|
|
const unsigned long end)
|
|
|
|
{
|
|
|
|
struct acpi_srat_generic_affinity *ga = (void *)header;
|
|
|
|
|
|
|
|
if (!ga)
|
|
|
|
return -EINVAL;
|
|
|
|
|
|
|
|
if (!(ga->flags & ACPI_SRAT_GENERIC_AFFINITY_ENABLED))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* Skip PCI device_handle for now */
|
|
|
|
if (ga->device_handle_type != 0)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
alloc_genport_target(ga->proximity_domain,
|
|
|
|
(u8 *)ga->device_handle);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static u32 hmat_initiator_perf(struct memory_target *target,
|
2019-03-11 14:56:03 -06:00
|
|
|
struct memory_initiator *initiator,
|
|
|
|
struct acpi_hmat_locality *hmat_loc)
|
|
|
|
{
|
|
|
|
unsigned int ipds, tpds, i, idx = 0, tdx = 0;
|
|
|
|
u32 *inits, *targs;
|
|
|
|
u16 *entries;
|
|
|
|
|
|
|
|
ipds = hmat_loc->number_of_initiator_Pds;
|
|
|
|
tpds = hmat_loc->number_of_target_Pds;
|
|
|
|
inits = (u32 *)(hmat_loc + 1);
|
|
|
|
targs = inits + ipds;
|
|
|
|
entries = (u16 *)(targs + tpds);
|
|
|
|
|
|
|
|
for (i = 0; i < ipds; i++) {
|
|
|
|
if (inits[i] == initiator->processor_pxm) {
|
|
|
|
idx = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i == ipds)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
for (i = 0; i < tpds; i++) {
|
|
|
|
if (targs[i] == target->memory_pxm) {
|
|
|
|
tdx = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (i == tpds)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return hmat_normalize(entries[idx * tpds + tdx],
|
|
|
|
hmat_loc->entry_base_unit,
|
|
|
|
hmat_loc->data_type);
|
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static bool hmat_update_best(u8 type, u32 value, u32 *best)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
bool updated = false;
|
|
|
|
|
|
|
|
if (!value)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
switch (type) {
|
|
|
|
case ACPI_HMAT_ACCESS_LATENCY:
|
|
|
|
case ACPI_HMAT_READ_LATENCY:
|
|
|
|
case ACPI_HMAT_WRITE_LATENCY:
|
|
|
|
if (!*best || *best > value) {
|
|
|
|
*best = value;
|
|
|
|
updated = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case ACPI_HMAT_ACCESS_BANDWIDTH:
|
|
|
|
case ACPI_HMAT_READ_BANDWIDTH:
|
|
|
|
case ACPI_HMAT_WRITE_BANDWIDTH:
|
|
|
|
if (!*best || *best < value) {
|
|
|
|
*best = value;
|
|
|
|
updated = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return updated;
|
|
|
|
}
|
|
|
|
|
2021-04-08 11:28:34 -07:00
|
|
|
static int initiator_cmp(void *priv, const struct list_head *a,
|
|
|
|
const struct list_head *b)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
struct memory_initiator *ia;
|
|
|
|
struct memory_initiator *ib;
|
|
|
|
|
|
|
|
ia = list_entry(a, struct memory_initiator, node);
|
|
|
|
ib = list_entry(b, struct memory_initiator, node);
|
|
|
|
|
|
|
|
return ia->processor_pxm - ib->processor_pxm;
|
|
|
|
}
|
|
|
|
|
2022-11-16 16:37:37 -07:00
|
|
|
static int initiators_to_nodemask(unsigned long *p_nodes)
|
|
|
|
{
|
|
|
|
struct memory_initiator *initiator;
|
|
|
|
|
|
|
|
if (list_empty(&initiators))
|
|
|
|
return -ENXIO;
|
|
|
|
|
|
|
|
list_for_each_entry(initiator, &initiators, node)
|
|
|
|
set_bit(initiator->processor_pxm, p_nodes);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2023-09-26 14:06:26 +08:00
|
|
|
static void hmat_update_target_attrs(struct memory_target *target,
|
|
|
|
unsigned long *p_nodes, int access)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
struct memory_initiator *initiator;
|
2023-09-26 14:06:26 +08:00
|
|
|
unsigned int cpu_nid;
|
2019-03-11 14:56:03 -06:00
|
|
|
struct memory_locality *loc = NULL;
|
|
|
|
u32 best = 0;
|
|
|
|
int i;
|
|
|
|
|
2024-03-08 14:59:30 -07:00
|
|
|
/* Don't update if an external agent has changed the data. */
|
|
|
|
if (target->ext_updated)
|
|
|
|
return;
|
|
|
|
|
2023-12-21 15:03:01 -07:00
|
|
|
/* Don't update for generic port if there's no device handle */
|
2024-03-08 14:59:22 -07:00
|
|
|
if ((access == NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL ||
|
|
|
|
access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
|
2023-12-21 15:03:01 -07:00
|
|
|
!(*(u16 *)target->gen_port_device_handle))
|
|
|
|
return;
|
|
|
|
|
2023-09-26 14:06:26 +08:00
|
|
|
bitmap_zero(p_nodes, MAX_NUMNODES);
|
2019-03-11 14:56:03 -06:00
|
|
|
/*
|
2023-09-26 14:06:26 +08:00
|
|
|
* If the Address Range Structure provides a local processor pxm, set
|
2019-03-11 14:56:03 -06:00
|
|
|
* only that one. Otherwise, find the best performance attributes and
|
2023-09-26 14:06:26 +08:00
|
|
|
* collect all initiators that match.
|
2019-03-11 14:56:03 -06:00
|
|
|
*/
|
|
|
|
if (target->processor_pxm != PXM_INVAL) {
|
|
|
|
cpu_nid = pxm_to_node(target->processor_pxm);
|
2024-03-08 14:59:21 -07:00
|
|
|
if (access == ACCESS_COORDINATE_LOCAL ||
|
|
|
|
node_state(cpu_nid, N_CPU)) {
|
2023-09-26 14:06:26 +08:00
|
|
|
set_bit(target->processor_pxm, p_nodes);
|
2020-09-30 22:05:46 +08:00
|
|
|
return;
|
|
|
|
}
|
2019-03-11 14:56:03 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if (list_empty(&localities))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We need the initiator list sorted so we can use bitmap_clear for
|
|
|
|
* previously set initiators when we find a better memory accessor.
|
|
|
|
* We'll also use the sorting to prime the candidate nodes with known
|
|
|
|
* initiators.
|
|
|
|
*/
|
2022-11-16 16:37:37 -07:00
|
|
|
list_sort(NULL, &initiators, initiator_cmp);
|
|
|
|
if (initiators_to_nodemask(p_nodes) < 0)
|
|
|
|
return;
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
for (i = WRITE_LATENCY; i <= READ_BANDWIDTH; i++) {
|
|
|
|
loc = localities_types[i];
|
|
|
|
if (!loc)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
best = 0;
|
|
|
|
list_for_each_entry(initiator, &initiators, node) {
|
|
|
|
u32 value;
|
|
|
|
|
2024-03-08 14:59:22 -07:00
|
|
|
if ((access == ACCESS_COORDINATE_CPU ||
|
|
|
|
access == NODE_ACCESS_CLASS_GENPORT_SINK_CPU) &&
|
2024-03-08 14:59:21 -07:00
|
|
|
!initiator->has_cpu) {
|
2020-09-30 22:05:46 +08:00
|
|
|
clear_bit(initiator->processor_pxm, p_nodes);
|
|
|
|
continue;
|
|
|
|
}
|
2019-03-11 14:56:03 -06:00
|
|
|
if (!test_bit(initiator->processor_pxm, p_nodes))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
value = hmat_initiator_perf(target, initiator, loc->hmat_loc);
|
|
|
|
if (hmat_update_best(loc->hmat_loc->data_type, value, &best))
|
|
|
|
bitmap_clear(p_nodes, 0, initiator->processor_pxm);
|
|
|
|
if (value != best)
|
|
|
|
clear_bit(initiator->processor_pxm, p_nodes);
|
|
|
|
}
|
|
|
|
if (best)
|
2023-09-26 14:06:26 +08:00
|
|
|
hmat_update_target_access(target, loc->hmat_loc->data_type, best, access);
|
2019-03-11 14:56:03 -06:00
|
|
|
}
|
2023-09-26 14:06:26 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void __hmat_register_target_initiators(struct memory_target *target,
|
|
|
|
unsigned long *p_nodes,
|
|
|
|
int access)
|
|
|
|
{
|
|
|
|
unsigned int mem_nid, cpu_nid;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
mem_nid = pxm_to_node(target->memory_pxm);
|
|
|
|
hmat_update_target_attrs(target, p_nodes, access);
|
2019-03-11 14:56:03 -06:00
|
|
|
for_each_set_bit(i, p_nodes, MAX_NUMNODES) {
|
|
|
|
cpu_nid = pxm_to_node(i);
|
2023-09-26 14:06:26 +08:00
|
|
|
register_memory_node_under_compute_node(mem_nid, cpu_nid, access);
|
2019-03-11 14:56:03 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-03-08 14:59:20 -07:00
|
|
|
static void hmat_update_generic_target(struct memory_target *target)
|
2023-12-21 15:03:01 -07:00
|
|
|
{
|
|
|
|
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
|
|
|
|
|
2024-03-08 14:59:20 -07:00
|
|
|
hmat_update_target_attrs(target, p_nodes,
|
2024-03-08 14:59:22 -07:00
|
|
|
NODE_ACCESS_CLASS_GENPORT_SINK_LOCAL);
|
|
|
|
hmat_update_target_attrs(target, p_nodes,
|
|
|
|
NODE_ACCESS_CLASS_GENPORT_SINK_CPU);
|
2023-12-21 15:03:01 -07:00
|
|
|
}
|
|
|
|
|
2023-09-26 14:06:26 +08:00
|
|
|
static void hmat_register_target_initiators(struct memory_target *target)
|
|
|
|
{
|
|
|
|
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
|
|
|
|
|
2024-03-08 14:59:21 -07:00
|
|
|
__hmat_register_target_initiators(target, p_nodes,
|
|
|
|
ACCESS_COORDINATE_LOCAL);
|
|
|
|
__hmat_register_target_initiators(target, p_nodes,
|
|
|
|
ACCESS_COORDINATE_CPU);
|
2023-09-26 14:06:26 +08:00
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static void hmat_register_target_cache(struct memory_target *target)
|
2019-08-05 08:27:04 -06:00
|
|
|
{
|
|
|
|
unsigned mem_nid = pxm_to_node(target->memory_pxm);
|
|
|
|
struct target_cache *tcache;
|
|
|
|
|
|
|
|
list_for_each_entry(tcache, &target->caches, node)
|
|
|
|
node_add_cache(mem_nid, &tcache->cache_attrs);
|
|
|
|
}
|
|
|
|
|
2020-09-30 22:05:46 +08:00
|
|
|
static void hmat_register_target_perf(struct memory_target *target, int access)
|
2019-03-11 14:56:04 -06:00
|
|
|
{
|
|
|
|
unsigned mem_nid = pxm_to_node(target->memory_pxm);
|
2023-12-21 15:02:37 -07:00
|
|
|
node_set_perf_attrs(mem_nid, &target->coord[access], access);
|
2019-03-11 14:56:04 -06:00
|
|
|
}
|
|
|
|
|
2019-11-11 16:34:26 -05:00
|
|
|
static void hmat_register_target_devices(struct memory_target *target)
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
{
|
|
|
|
struct resource *res;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Do not bother creating devices if no driver is available to
|
|
|
|
* consume them.
|
|
|
|
*/
|
|
|
|
if (!IS_ENABLED(CONFIG_DEV_DAX_HMEM))
|
|
|
|
return;
|
|
|
|
|
2020-10-13 16:49:13 -07:00
|
|
|
for (res = target->memregions.child; res; res = res->sibling) {
|
ACPI updates for 5.10-rc1
- Add support for generic initiator-only proximity domains to
the ACPI NUMA code and the architectures using it (Jonathan
Cameron).
- Clean up some non-ACPICA code referring to debug facilities from
ACPICA that are not actually used in there (Hanjun Guo).
- Add new DPTF driver for the PCH FIVR participant (Srinivas
Pandruvada).
- Reduce overhead related to accessing GPE registers in ACPICA and
the OS interface layer and make it possible to access GPE registers
using logical addresses if they are memory-mapped (Rafael Wysocki).
- Update the ACPICA code in the kernel to upstream revision 20200925
including changes as follows:
* Add predefined names from the SMBus sepcification (Bob Moore).
* Update acpi_help UUID list (Bob Moore).
* Return exceptions for string-to-integer conversions in iASL (Bob
Moore).
* Add a new "ALL <NameSeg>" debugger command (Bob Moore).
* Add support for 64 bit risc-v compilation (Colin Ian King).
* Do assorted cleanups (Bob Moore, Colin Ian King, Randy Dunlap).
- Add new ACPI backlight whitelist entry for HP 635 Notebook (Alex
Hung).
- Move TPS68470 OpRegion driver to drivers/acpi/pmic/ and split out
Kconfig and Makefile specific for ACPI PMIC (Andy Shevchenko).
- Clean up the ACPI SoC driver for AMD SoCs (Hanjun Guo).
- Add missing config_item_put() to fix refcount leak (Hanjun Guo).
- Drop lefrover field from struct acpi_memory_device (Hanjun Guo).
- Make the ACPI extlog driver check for RDMSR failures (Ben
Hutchings).
- Fix handling of lid state changes in the ACPI button driver when
input device is closed (Dmitry Torokhov).
- Fix several assorted build issues (Barnabás Pőcze, John Garry,
Nathan Chancellor, Tian Tao).
- Drop unused inline functions and reduce code duplication by using
kobj_to_dev() in the NFIT parsing code (YueHaibing, Wang Qing).
- Serialize tools/power/acpi Makefile (Thomas Renninger).
-----BEGIN PGP SIGNATURE-----
iQJGBAABCAAwFiEE4fcc61cGeeHD/fCwgsRv/nhiVHEFAl+F4IkSHHJqd0Byand5
c29ja2kubmV0AAoJEILEb/54YlRx1gIQAIZrt09fquEIZhYulGZAkuYhSX2U/DZt
poow5+TiGk36JNHlbZS19kZ3F0tJ1wA6CKSfF/bYyULxL+gYaUjdLXzv2kArTSAj
nzDXQ2CystpySZI/sEkl4QjsMg0xuZlBhlnCfNHzJw049TgdsJHnxMkJXb8T90A+
l2JKm2OpBkNvQGNpwd3djLg8xSDnHUmuevsWZPHDp92/fLMF9DUBk8dVuEwa0ndF
hAUpWm+EL1tJQnhNwtfV/Akd9Ypqgk/7ROFWFHGDtHMZGnBjpyXZw68vHMX7SL6N
Ej90GWGPHSJs/7Fsg4Hiaxxcph9WFNLPcpck5lVAMIrNHMKANjqQzCsmHavV/WTG
STC9/qwJauA1EOjovlmlCFHctjKE/ya6Hm299WTlfBqB+Lu1L3oMR2CC+Uj0YfyG
sv3264rJCsaSw610iwQOG807qHENopASO2q5DuKG0E9JpcaBUwn1N4qP5svvQciq
4aA8Ma6xM/QHCO4CS0Se9C0+WSVtxWwOUichRqQmU4E6u1sXvKJxTeWo79rV7PAh
L6BwoOxBLabEiyzpi6HPGs6DoKj/N6tOQenBh4ibdwpAwMtq7hIlBFa0bp19c2wT
vx8F2Raa8vbQ2zZ1QEiPZnPLJUoy2DgaCtKJ6E0FTDXNs3VFlWgyhIUlIRqk5BS9
OnAwVAUrTMkJ
=feLU
-----END PGP SIGNATURE-----
Merge tag 'acpi-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull ACPI updates from Rafael Wysocki:
"These add support for generic initiator-only proximity domains to the
ACPI NUMA code and the architectures using it, clean up some
non-ACPICA code referring to debug facilities from ACPICA, reduce the
overhead related to accessing GPE registers, add a new DPTF (Dynamic
Power and Thermal Framework) participant driver, update the ACPICA
code in the kernel to upstream revision 20200925, add a new ACPI
backlight whitelist entry, fix a few assorted issues and clean up some
code.
Specifics:
- Add support for generic initiator-only proximity domains to the
ACPI NUMA code and the architectures using it (Jonathan Cameron)
- Clean up some non-ACPICA code referring to debug facilities from
ACPICA that are not actually used in there (Hanjun Guo)
- Add new DPTF driver for the PCH FIVR participant (Srinivas
Pandruvada)
- Reduce overhead related to accessing GPE registers in ACPICA and
the OS interface layer and make it possible to access GPE registers
using logical addresses if they are memory-mapped (Rafael Wysocki)
- Update the ACPICA code in the kernel to upstream revision 20200925
including changes as follows:
+ Add predefined names from the SMBus sepcification (Bob Moore)
+ Update acpi_help UUID list (Bob Moore)
+ Return exceptions for string-to-integer conversions in iASL (Bob
Moore)
+ Add a new "ALL <NameSeg>" debugger command (Bob Moore)
+ Add support for 64 bit risc-v compilation (Colin Ian King)
+ Do assorted cleanups (Bob Moore, Colin Ian King, Randy Dunlap)
- Add new ACPI backlight whitelist entry for HP 635 Notebook (Alex
Hung)
- Move TPS68470 OpRegion driver to drivers/acpi/pmic/ and split out
Kconfig and Makefile specific for ACPI PMIC (Andy Shevchenko)
- Clean up the ACPI SoC driver for AMD SoCs (Hanjun Guo)
- Add missing config_item_put() to fix refcount leak (Hanjun Guo)
- Drop lefrover field from struct acpi_memory_device (Hanjun Guo)
- Make the ACPI extlog driver check for RDMSR failures (Ben
Hutchings)
- Fix handling of lid state changes in the ACPI button driver when
input device is closed (Dmitry Torokhov)
- Fix several assorted build issues (Barnabás Pőcze, John Garry,
Nathan Chancellor, Tian Tao)
- Drop unused inline functions and reduce code duplication by using
kobj_to_dev() in the NFIT parsing code (YueHaibing, Wang Qing)
- Serialize tools/power/acpi Makefile (Thomas Renninger)"
* tag 'acpi-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (64 commits)
ACPICA: Update version to 20200925 Version 20200925
ACPICA: Remove unnecessary semicolon
ACPICA: Debugger: Add a new command: "ALL <NameSeg>"
ACPICA: iASL: Return exceptions for string-to-integer conversions
ACPICA: acpi_help: Update UUID list
ACPICA: Add predefined names found in the SMBus sepcification
ACPICA: Tree-wide: fix various typos and spelling mistakes
ACPICA: Drop the repeated word "an" in a comment
ACPICA: Add support for 64 bit risc-v compilation
ACPI: button: fix handling lid state changes when input device closed
tools/power/acpi: Serialize Makefile
ACPI: scan: Replace ACPI_DEBUG_PRINT() with pr_debug()
ACPI: memhotplug: Remove 'state' from struct acpi_memory_device
ACPI / extlog: Check for RDMSR failure
ACPI: Make acpi_evaluate_dsm() prototype consistent
docs: mm: numaperf.rst Add brief description for access class 1.
node: Add access1 class to represent CPU to memory characteristics
ACPI: HMAT: Fix handling of changes from ACPI 6.2 to ACPI 6.3
ACPI: Let ACPI know we support Generic Initiator Affinity Structures
x86: Support Generic Initiator only proximity domains
...
2020-10-14 11:42:04 -07:00
|
|
|
int target_nid = pxm_to_node(target->memory_pxm);
|
2020-10-13 16:49:13 -07:00
|
|
|
|
2023-02-10 01:07:07 -08:00
|
|
|
hmem_register_resource(target_nid, res);
|
2020-10-13 16:49:13 -07:00
|
|
|
}
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static void hmat_register_target(struct memory_target *target)
|
2019-08-05 08:27:04 -06:00
|
|
|
{
|
2019-08-05 08:27:06 -06:00
|
|
|
int nid = pxm_to_node(target->memory_pxm);
|
|
|
|
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
/*
|
|
|
|
* Devices may belong to either an offline or online
|
|
|
|
* node, so unconditionally add them.
|
|
|
|
*/
|
|
|
|
hmat_register_target_devices(target);
|
|
|
|
|
2023-12-21 15:03:01 -07:00
|
|
|
/*
|
|
|
|
* Register generic port perf numbers. The nid may not be
|
|
|
|
* initialized and is still NUMA_NO_NODE.
|
|
|
|
*/
|
|
|
|
mutex_lock(&target_lock);
|
|
|
|
if (*(u16 *)target->gen_port_device_handle) {
|
2024-03-08 14:59:20 -07:00
|
|
|
hmat_update_generic_target(target);
|
2023-12-21 15:03:01 -07:00
|
|
|
target->registered = true;
|
|
|
|
}
|
|
|
|
mutex_unlock(&target_lock);
|
|
|
|
|
2019-08-05 08:27:06 -06:00
|
|
|
/*
|
|
|
|
* Skip offline nodes. This can happen when memory
|
|
|
|
* marked EFI_MEMORY_SP, "specific purpose", is applied
|
2021-03-12 18:55:35 -07:00
|
|
|
* to all the memory in a proximity domain leading to
|
2019-08-05 08:27:06 -06:00
|
|
|
* the node being marked offline / unplugged, or if
|
|
|
|
* memory-only "hotplug" node is offline.
|
|
|
|
*/
|
|
|
|
if (nid == NUMA_NO_NODE || !node_online(nid))
|
2019-08-05 08:27:04 -06:00
|
|
|
return;
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
mutex_lock(&target_lock);
|
|
|
|
if (!target->registered) {
|
|
|
|
hmat_register_target_initiators(target);
|
|
|
|
hmat_register_target_cache(target);
|
2024-03-08 14:59:21 -07:00
|
|
|
hmat_register_target_perf(target, ACCESS_COORDINATE_LOCAL);
|
|
|
|
hmat_register_target_perf(target, ACCESS_COORDINATE_CPU);
|
2019-08-05 08:27:05 -06:00
|
|
|
target->registered = true;
|
|
|
|
}
|
|
|
|
mutex_unlock(&target_lock);
|
2019-08-05 08:27:04 -06:00
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static void hmat_register_targets(void)
|
2019-03-11 14:56:03 -06:00
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
|
2019-08-05 08:27:04 -06:00
|
|
|
list_for_each_entry(target, &targets, node)
|
|
|
|
hmat_register_target(target);
|
2019-03-11 14:56:03 -06:00
|
|
|
}
|
|
|
|
|
2019-08-05 08:27:05 -06:00
|
|
|
static int hmat_callback(struct notifier_block *self,
|
|
|
|
unsigned long action, void *arg)
|
|
|
|
{
|
|
|
|
struct memory_target *target;
|
|
|
|
struct memory_notify *mnb = arg;
|
|
|
|
int pxm, nid = mnb->status_change_nid;
|
|
|
|
|
|
|
|
if (nid == NUMA_NO_NODE || action != MEM_ONLINE)
|
|
|
|
return NOTIFY_OK;
|
|
|
|
|
|
|
|
pxm = node_to_pxm(nid);
|
|
|
|
target = find_mem_target(pxm);
|
|
|
|
if (!target)
|
|
|
|
return NOTIFY_OK;
|
|
|
|
|
|
|
|
hmat_register_target(target);
|
|
|
|
return NOTIFY_OK;
|
|
|
|
}
|
|
|
|
|
- 875fa64577da ("mm/hugetlb_vmemmap: fix race with speculative PFN
walkers") is known to cause a performance regression
(https://lore.kernel.org/all/3acefad9-96e5-4681-8014-827d6be71c7a@linux.ibm.com/T/#mfa809800a7862fb5bdf834c6f71a3a5113eb83ff).
Yu has a fix which I'll send along later via the hotfixes branch.
- In the series "mm: Avoid possible overflows in dirty throttling" Jan
Kara addresses a couple of issues in the writeback throttling code.
These fixes are also targetted at -stable kernels.
- Ryusuke Konishi's series "nilfs2: fix potential issues related to
reserved inodes" does that. This should actually be in the
mm-nonmm-stable tree, along with the many other nilfs2 patches. My bad.
- More folio conversions from Kefeng Wang in the series "mm: convert to
folio_alloc_mpol()"
- Kemeng Shi has sent some cleanups to the writeback code in the series
"Add helper functions to remove repeated code and improve readability of
cgroup writeback"
- Kairui Song has made the swap code a little smaller and a little
faster in the series "mm/swap: clean up and optimize swap cache index".
- In the series "mm/memory: cleanly support zeropage in
vm_insert_page*(), vm_map_pages*() and vmf_insert_mixed()" David
Hildenbrand has reworked the rather sketchy handling of the use of the
zeropage in MAP_SHARED mappings. I don't see any runtime effects here -
more a cleanup/understandability/maintainablity thing.
- Dev Jain has improved selftests/mm/va_high_addr_switch.c's handling of
higher addresses, for aarch64. The (poorly named) series is
"Restructure va_high_addr_switch".
- The core TLB handling code gets some cleanups and possible slight
optimizations in Bang Li's series "Add update_mmu_tlb_range() to
simplify code".
- Jane Chu has improved the handling of our
fake-an-unrecoverable-memory-error testing feature MADV_HWPOISON in the
series "Enhance soft hwpoison handling and injection".
- Jeff Johnson has sent a billion patches everywhere to add
MODULE_DESCRIPTION() to everything. Some landed in this pull.
- In the series "mm: cleanup MIGRATE_SYNC_NO_COPY mode", Kefeng Wang has
simplified migration's use of hardware-offload memory copying.
- Yosry Ahmed performs more folio API conversions in his series "mm:
zswap: trivial folio conversions".
- In the series "large folios swap-in: handle refault cases first",
Chuanhua Han inches us forward in the handling of large pages in the
swap code. This is a cleanup and optimization, working toward the end
objective of full support of large folio swapin/out.
- In the series "mm,swap: cleanup VMA based swap readahead window
calculation", Huang Ying has contributed some cleanups and a possible
fixlet to his VMA based swap readahead code.
- In the series "add mTHP support for anonymous shmem" Baolin Wang has
taught anonymous shmem mappings to use multisize THP. By default this
is a no-op - users must opt in vis sysfs controls. Dramatic
improvements in pagefault latency are realized.
- David Hildenbrand has some cleanups to our remaining use of
page_mapcount() in the series "fs/proc: move page_mapcount() to
fs/proc/internal.h".
- David also has some highmem accounting cleanups in the series
"mm/highmem: don't track highmem pages manually".
- Build-time fixes and cleanups from John Hubbard in the series
"cleanups, fixes, and progress towards avoiding "make headers"".
- Cleanups and consolidation of the core pagemap handling from Barry
Song in the series "mm: introduce pmd|pte_needs_soft_dirty_wp helpers
and utilize them".
- Lance Yang's series "Reclaim lazyfree THP without splitting" has
reduced the latency of the reclaim of pmd-mapped THPs under fairly
common circumstances. A 10x speedup is seen in a microbenchmark.
It does this by punting to aother CPU but I guess that's a win unless
all CPUs are pegged.
- hugetlb_cgroup cleanups from Xiu Jianfeng in the series
"mm/hugetlb_cgroup: rework on cftypes".
- Miaohe Lin's series "Some cleanups for memory-failure" does just that
thing.
- Is anyone reading this stuff? If so, email me!
- Someone other than SeongJae has developed a DAMON feature in Honggyu
Kim's series "DAMON based tiered memory management for CXL memory".
This adds DAMON features which may be used to help determine the
efficiency of our placement of CXL/PCIe attached DRAM.
- DAMON user API centralization and simplificatio work in SeongJae
Park's series "mm/damon: introduce DAMON parameters online commit
function".
- In the series "mm: page_type, zsmalloc and page_mapcount_reset()"
David Hildenbrand does some maintenance work on zsmalloc - partially
modernizing its use of pageframe fields.
- Kefeng Wang provides more folio conversions in the series "mm: remove
page_maybe_dma_pinned() and page_mkclean()".
- More cleanup from David Hildenbrand, this time in the series
"mm/memory_hotplug: use PageOffline() instead of PageReserved() for
!ZONE_DEVICE". It "enlightens memory hotplug more about PageOffline()
pages" and permits the removal of some virtio-mem hacks.
- Barry Song's series "mm: clarify folio_add_new_anon_rmap() and
__folio_add_anon_rmap()" is a cleanup to the anon folio handling in
preparation for mTHP (multisize THP) swapin.
- Kefeng Wang's series "mm: improve clear and copy user folio"
implements more folio conversions, this time in the area of large folio
userspace copying.
- The series "Docs/mm/damon/maintaier-profile: document a mailing tool
and community meetup series" tells people how to get better involved
with other DAMON developers. From SeongJae Park.
- A large series ("kmsan: Enable on s390") from Ilya Leoshkevich does
that.
- David Hildenbrand sends along more cleanups, this time against the
migration code. The series is "mm/migrate: move NUMA hinting fault
folio isolation + checks under PTL".
- Jan Kara has found quite a lot of strangenesses and minor errors in
the readahead code. He addresses this in the series "mm: Fix various
readahead quirks".
- SeongJae Park's series "selftests/damon: test DAMOS tried regions and
{min,max}_nr_regions" adds features and addresses errors in DAMON's self
testing code.
- Gavin Shan has found a userspace-triggerable WARN in the pagecache
code. The series "mm/filemap: Limit page cache size to that supported
by xarray" addresses this. The series is marked cc:stable.
- Chengming Zhou's series "mm/ksm: cmp_and_merge_page() optimizations
and cleanup" cleans up and slightly optimizes KSM.
- Roman Gushchin has separated the memcg-v1 and memcg-v2 code - lots of
code motion. The series (which also makes the memcg-v1 code
Kconfigurable) are
"mm: memcg: separate legacy cgroup v1 code and put under config
option" and
"mm: memcg: put cgroup v1-specific memcg data under CONFIG_MEMCG_V1"
- Dan Schatzberg's series "Add swappiness argument to memory.reclaim"
adds an additional feature to this cgroup-v2 control file.
- The series "Userspace controls soft-offline pages" from Jiaqi Yan
permits userspace to stop the kernel's automatic treatment of excessive
correctable memory errors. In order to permit userspace to monitor and
handle this situation.
- Kefeng Wang's series "mm: migrate: support poison recover from migrate
folio" teaches the kernel to appropriately handle migration from
poisoned source folios rather than simply panicing.
- SeongJae Park's series "Docs/damon: minor fixups and improvements"
does those things.
- In the series "mm/zsmalloc: change back to per-size_class lock"
Chengming Zhou improves zsmalloc's scalability and memory utilization.
- Vivek Kasireddy's series "mm/gup: Introduce memfd_pin_folios() for
pinning memfd folios" makes the GUP code use FOLL_PIN rather than bare
refcount increments. So these paes can first be moved aside if they
reside in the movable zone or a CMA block.
- Andrii Nakryiko has added a binary ioctl()-based API to /proc/pid/maps
for much faster reading of vma information. The series is "query VMAs
from /proc/<pid>/maps".
- In the series "mm: introduce per-order mTHP split counters" Lance Yang
improves the kernel's presentation of developer information related to
multisize THP splitting.
- Michael Ellerman has developed the series "Reimplement huge pages
without hugepd on powerpc (8xx, e500, book3s/64)". This permits
userspace to use all available huge page sizes.
- In the series "revert unconditional slab and page allocator fault
injection calls" Vlastimil Babka removes a performance-affecting and not
very useful feature from slab fault injection.
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQTTMBEPP41GrTpTJgfdBJ7gKXxAjgUCZp2C+QAKCRDdBJ7gKXxA
joTkAQDvjqOoFStqk4GU3OXMYB7WCU/ZQMFG0iuu1EEwTVDZ4QEA8CnG7seek1R3
xEoo+vw0sWWeLV3qzsxnCA1BJ8cTJA8=
=z0Lf
-----END PGP SIGNATURE-----
Merge tag 'mm-stable-2024-07-21-14-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
- In the series "mm: Avoid possible overflows in dirty throttling" Jan
Kara addresses a couple of issues in the writeback throttling code.
These fixes are also targetted at -stable kernels.
- Ryusuke Konishi's series "nilfs2: fix potential issues related to
reserved inodes" does that. This should actually be in the
mm-nonmm-stable tree, along with the many other nilfs2 patches. My
bad.
- More folio conversions from Kefeng Wang in the series "mm: convert to
folio_alloc_mpol()"
- Kemeng Shi has sent some cleanups to the writeback code in the series
"Add helper functions to remove repeated code and improve readability
of cgroup writeback"
- Kairui Song has made the swap code a little smaller and a little
faster in the series "mm/swap: clean up and optimize swap cache
index".
- In the series "mm/memory: cleanly support zeropage in
vm_insert_page*(), vm_map_pages*() and vmf_insert_mixed()" David
Hildenbrand has reworked the rather sketchy handling of the use of
the zeropage in MAP_SHARED mappings. I don't see any runtime effects
here - more a cleanup/understandability/maintainablity thing.
- Dev Jain has improved selftests/mm/va_high_addr_switch.c's handling
of higher addresses, for aarch64. The (poorly named) series is
"Restructure va_high_addr_switch".
- The core TLB handling code gets some cleanups and possible slight
optimizations in Bang Li's series "Add update_mmu_tlb_range() to
simplify code".
- Jane Chu has improved the handling of our
fake-an-unrecoverable-memory-error testing feature MADV_HWPOISON in
the series "Enhance soft hwpoison handling and injection".
- Jeff Johnson has sent a billion patches everywhere to add
MODULE_DESCRIPTION() to everything. Some landed in this pull.
- In the series "mm: cleanup MIGRATE_SYNC_NO_COPY mode", Kefeng Wang
has simplified migration's use of hardware-offload memory copying.
- Yosry Ahmed performs more folio API conversions in his series "mm:
zswap: trivial folio conversions".
- In the series "large folios swap-in: handle refault cases first",
Chuanhua Han inches us forward in the handling of large pages in the
swap code. This is a cleanup and optimization, working toward the end
objective of full support of large folio swapin/out.
- In the series "mm,swap: cleanup VMA based swap readahead window
calculation", Huang Ying has contributed some cleanups and a possible
fixlet to his VMA based swap readahead code.
- In the series "add mTHP support for anonymous shmem" Baolin Wang has
taught anonymous shmem mappings to use multisize THP. By default this
is a no-op - users must opt in vis sysfs controls. Dramatic
improvements in pagefault latency are realized.
- David Hildenbrand has some cleanups to our remaining use of
page_mapcount() in the series "fs/proc: move page_mapcount() to
fs/proc/internal.h".
- David also has some highmem accounting cleanups in the series
"mm/highmem: don't track highmem pages manually".
- Build-time fixes and cleanups from John Hubbard in the series
"cleanups, fixes, and progress towards avoiding "make headers"".
- Cleanups and consolidation of the core pagemap handling from Barry
Song in the series "mm: introduce pmd|pte_needs_soft_dirty_wp helpers
and utilize them".
- Lance Yang's series "Reclaim lazyfree THP without splitting" has
reduced the latency of the reclaim of pmd-mapped THPs under fairly
common circumstances. A 10x speedup is seen in a microbenchmark.
It does this by punting to aother CPU but I guess that's a win unless
all CPUs are pegged.
- hugetlb_cgroup cleanups from Xiu Jianfeng in the series
"mm/hugetlb_cgroup: rework on cftypes".
- Miaohe Lin's series "Some cleanups for memory-failure" does just that
thing.
- Someone other than SeongJae has developed a DAMON feature in Honggyu
Kim's series "DAMON based tiered memory management for CXL memory".
This adds DAMON features which may be used to help determine the
efficiency of our placement of CXL/PCIe attached DRAM.
- DAMON user API centralization and simplificatio work in SeongJae
Park's series "mm/damon: introduce DAMON parameters online commit
function".
- In the series "mm: page_type, zsmalloc and page_mapcount_reset()"
David Hildenbrand does some maintenance work on zsmalloc - partially
modernizing its use of pageframe fields.
- Kefeng Wang provides more folio conversions in the series "mm: remove
page_maybe_dma_pinned() and page_mkclean()".
- More cleanup from David Hildenbrand, this time in the series
"mm/memory_hotplug: use PageOffline() instead of PageReserved() for
!ZONE_DEVICE". It "enlightens memory hotplug more about PageOffline()
pages" and permits the removal of some virtio-mem hacks.
- Barry Song's series "mm: clarify folio_add_new_anon_rmap() and
__folio_add_anon_rmap()" is a cleanup to the anon folio handling in
preparation for mTHP (multisize THP) swapin.
- Kefeng Wang's series "mm: improve clear and copy user folio"
implements more folio conversions, this time in the area of large
folio userspace copying.
- The series "Docs/mm/damon/maintaier-profile: document a mailing tool
and community meetup series" tells people how to get better involved
with other DAMON developers. From SeongJae Park.
- A large series ("kmsan: Enable on s390") from Ilya Leoshkevich does
that.
- David Hildenbrand sends along more cleanups, this time against the
migration code. The series is "mm/migrate: move NUMA hinting fault
folio isolation + checks under PTL".
- Jan Kara has found quite a lot of strangenesses and minor errors in
the readahead code. He addresses this in the series "mm: Fix various
readahead quirks".
- SeongJae Park's series "selftests/damon: test DAMOS tried regions and
{min,max}_nr_regions" adds features and addresses errors in DAMON's
self testing code.
- Gavin Shan has found a userspace-triggerable WARN in the pagecache
code. The series "mm/filemap: Limit page cache size to that supported
by xarray" addresses this. The series is marked cc:stable.
- Chengming Zhou's series "mm/ksm: cmp_and_merge_page() optimizations
and cleanup" cleans up and slightly optimizes KSM.
- Roman Gushchin has separated the memcg-v1 and memcg-v2 code - lots of
code motion. The series (which also makes the memcg-v1 code
Kconfigurable) are "mm: memcg: separate legacy cgroup v1 code and put
under config option" and "mm: memcg: put cgroup v1-specific memcg
data under CONFIG_MEMCG_V1"
- Dan Schatzberg's series "Add swappiness argument to memory.reclaim"
adds an additional feature to this cgroup-v2 control file.
- The series "Userspace controls soft-offline pages" from Jiaqi Yan
permits userspace to stop the kernel's automatic treatment of
excessive correctable memory errors. In order to permit userspace to
monitor and handle this situation.
- Kefeng Wang's series "mm: migrate: support poison recover from
migrate folio" teaches the kernel to appropriately handle migration
from poisoned source folios rather than simply panicing.
- SeongJae Park's series "Docs/damon: minor fixups and improvements"
does those things.
- In the series "mm/zsmalloc: change back to per-size_class lock"
Chengming Zhou improves zsmalloc's scalability and memory
utilization.
- Vivek Kasireddy's series "mm/gup: Introduce memfd_pin_folios() for
pinning memfd folios" makes the GUP code use FOLL_PIN rather than
bare refcount increments. So these paes can first be moved aside if
they reside in the movable zone or a CMA block.
- Andrii Nakryiko has added a binary ioctl()-based API to
/proc/pid/maps for much faster reading of vma information. The series
is "query VMAs from /proc/<pid>/maps".
- In the series "mm: introduce per-order mTHP split counters" Lance
Yang improves the kernel's presentation of developer information
related to multisize THP splitting.
- Michael Ellerman has developed the series "Reimplement huge pages
without hugepd on powerpc (8xx, e500, book3s/64)". This permits
userspace to use all available huge page sizes.
- In the series "revert unconditional slab and page allocator fault
injection calls" Vlastimil Babka removes a performance-affecting and
not very useful feature from slab fault injection.
* tag 'mm-stable-2024-07-21-14-50' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (411 commits)
mm/mglru: fix ineffective protection calculation
mm/zswap: fix a white space issue
mm/hugetlb: fix kernel NULL pointer dereference when migrating hugetlb folio
mm/hugetlb: fix possible recursive locking detected warning
mm/gup: clear the LRU flag of a page before adding to LRU batch
mm/numa_balancing: teach mpol_to_str about the balancing mode
mm: memcg1: convert charge move flags to unsigned long long
alloc_tag: fix page_ext_get/page_ext_put sequence during page splitting
lib: reuse page_ext_data() to obtain codetag_ref
lib: add missing newline character in the warning message
mm/mglru: fix overshooting shrinker memory
mm/mglru: fix div-by-zero in vmpressure_calc_level()
mm/kmemleak: replace strncpy() with strscpy()
mm, page_alloc: put should_fail_alloc_page() back behing CONFIG_FAIL_PAGE_ALLOC
mm, slab: put should_failslab() back behind CONFIG_SHOULD_FAILSLAB
mm: ignore data-race in __swap_writepage
hugetlbfs: ensure generic_hugetlb_get_unmapped_area() returns higher address than mmap_min_addr
mm: shmem: rename mTHP shmem counters
mm: swap_state: use folio_alloc_mpol() in __read_swap_cache_async()
mm/migrate: putback split folios when numa hint migration fails
...
2024-07-21 17:15:46 -07:00
|
|
|
static int __init hmat_set_default_dram_perf(void)
|
2023-09-26 14:06:27 +08:00
|
|
|
{
|
|
|
|
int rc;
|
|
|
|
int nid, pxm;
|
|
|
|
struct memory_target *target;
|
2023-12-21 15:02:37 -07:00
|
|
|
struct access_coordinate *attrs;
|
2023-09-26 14:06:27 +08:00
|
|
|
|
memory tier: consolidate the initialization of memory tiers
The current memory tier initialization process is distributed across two
different functions, memory_tier_init() and memory_tier_late_init(). This
design is hard to maintain. Thus, this patch is proposed to reduce the
possible code paths by consolidating different initialization patches into
one.
The earlier discussion with Jonathan and Ying is listed here:
https://lore.kernel.org/lkml/20240405150244.00004b49@Huawei.com/
If we want to put these two initializations together, they must be placed
together in the later function. Because only at that time, the HMAT
information will be ready, adist between nodes can be calculated, and
memory tiering can be established based on the adist. So we position the
initialization at memory_tier_init() to the memory_tier_late_init() call.
Moreover, it's natural to keep memory_tier initialization in drivers at
device_initcall() level.
If we simply move the set_node_memory_tier() from memory_tier_init() to
late_initcall(), it will result in HMAT not registering the
mt_adistance_algorithm callback function, because set_node_memory_tier()
is not performed during the memory tiering initialization phase, leading
to a lack of correct default_dram information.
Therefore, we introduced a nodemask to pass the information of the default
DRAM nodes. The reason for not choosing to reuse default_dram_type->nodes
is that it is not clean enough. So in the end, we use a __initdata
variable, which is a variable that is released once initialization is
complete, including both CPU and memory nodes for HMAT to iterate through.
Link: https://lkml.kernel.org/r/20240704072646.437579-1-horen.chuang@linux.dev
Signed-off-by: Ho-Ren (Jack) Chuang <horenchuang@bytedance.com>
Suggested-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Gregory Price <gourry.memverge@gmail.com>
Cc: Len Brown <lenb@kernel.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Rafael J. Wysocki <rafael@kernel.org>
Cc: Ravi Jonnalagadda <ravis.opensrc@micron.com>
Cc: SeongJae Park <sj@kernel.org>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
2024-07-04 07:26:44 +00:00
|
|
|
for_each_node_mask(nid, default_dram_nodes) {
|
2023-09-26 14:06:27 +08:00
|
|
|
pxm = node_to_pxm(nid);
|
|
|
|
target = find_mem_target(pxm);
|
|
|
|
if (!target)
|
|
|
|
continue;
|
2024-06-06 10:28:45 +08:00
|
|
|
attrs = &target->coord[ACCESS_COORDINATE_CPU];
|
2023-09-26 14:06:27 +08:00
|
|
|
rc = mt_set_default_dram_perf(nid, attrs, "ACPI HMAT");
|
|
|
|
if (rc)
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int hmat_calculate_adistance(struct notifier_block *self,
|
|
|
|
unsigned long nid, void *data)
|
|
|
|
{
|
|
|
|
static DECLARE_BITMAP(p_nodes, MAX_NUMNODES);
|
|
|
|
struct memory_target *target;
|
2023-12-21 15:02:37 -07:00
|
|
|
struct access_coordinate *perf;
|
2023-09-26 14:06:27 +08:00
|
|
|
int *adist = data;
|
|
|
|
int pxm;
|
|
|
|
|
|
|
|
pxm = node_to_pxm(nid);
|
|
|
|
target = find_mem_target(pxm);
|
|
|
|
if (!target)
|
|
|
|
return NOTIFY_OK;
|
|
|
|
|
|
|
|
mutex_lock(&target_lock);
|
2024-03-08 14:59:21 -07:00
|
|
|
hmat_update_target_attrs(target, p_nodes, ACCESS_COORDINATE_CPU);
|
2023-09-26 14:06:27 +08:00
|
|
|
mutex_unlock(&target_lock);
|
|
|
|
|
2024-06-06 10:28:45 +08:00
|
|
|
perf = &target->coord[ACCESS_COORDINATE_CPU];
|
2023-09-26 14:06:27 +08:00
|
|
|
|
|
|
|
if (mt_perf_to_adistance(perf, adist))
|
|
|
|
return NOTIFY_OK;
|
|
|
|
|
|
|
|
return NOTIFY_STOP;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct notifier_block hmat_adist_nb __meminitdata = {
|
|
|
|
.notifier_call = hmat_calculate_adistance,
|
|
|
|
.priority = 100,
|
|
|
|
};
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
static __init void hmat_free_structures(void)
|
|
|
|
{
|
|
|
|
struct memory_target *target, *tnext;
|
|
|
|
struct memory_locality *loc, *lnext;
|
|
|
|
struct memory_initiator *initiator, *inext;
|
2019-08-05 08:27:04 -06:00
|
|
|
struct target_cache *tcache, *cnext;
|
2019-03-11 14:56:03 -06:00
|
|
|
|
|
|
|
list_for_each_entry_safe(target, tnext, &targets, node) {
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
struct resource *res, *res_next;
|
|
|
|
|
2019-08-05 08:27:04 -06:00
|
|
|
list_for_each_entry_safe(tcache, cnext, &target->caches, node) {
|
|
|
|
list_del(&tcache->node);
|
|
|
|
kfree(tcache);
|
|
|
|
}
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
list_del(&target->node);
|
ACPI: NUMA: HMAT: Register "soft reserved" memory as an "hmem" device
Memory that has been tagged EFI_MEMORY_SP, and has performance
properties described by the ACPI HMAT is expected to have an application
specific consumer.
Those consumers may want 100% of the memory capacity to be reserved from
any usage by the kernel. By default, with this enabling, a platform
device is created to represent this differentiated resource.
The device-dax "hmem" driver claims these devices by default and
provides an mmap interface for the target application. If the
administrator prefers, the hmem resource range can be made available to
the core-mm via the device-dax hotplug facility, kmem, to online the
memory with its own numa node.
This was tested with an emulated HMAT produced by qemu (with the pending
HMAT enabling patches), and "efi_fake_mem=8G@9G:0x40000" on the kernel
command line to mark the memory ranges associated with node2 and node3
as EFI_MEMORY_SP.
qemu numa configuration options:
-numa node,mem=4G,cpus=0-19,nodeid=0
-numa node,mem=4G,cpus=20-39,nodeid=1
-numa node,mem=4G,nodeid=2
-numa node,mem=4G,nodeid=3
-numa dist,src=0,dst=0,val=10
-numa dist,src=0,dst=1,val=21
-numa dist,src=0,dst=2,val=21
-numa dist,src=0,dst=3,val=21
-numa dist,src=1,dst=0,val=21
-numa dist,src=1,dst=1,val=10
-numa dist,src=1,dst=2,val=21
-numa dist,src=1,dst=3,val=21
-numa dist,src=2,dst=0,val=21
-numa dist,src=2,dst=1,val=21
-numa dist,src=2,dst=2,val=10
-numa dist,src=2,dst=3,val=21
-numa dist,src=3,dst=0,val=21
-numa dist,src=3,dst=1,val=21
-numa dist,src=3,dst=2,val=21
-numa dist,src=3,dst=3,val=10
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=0,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=0,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=0,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=0,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-latency,base-lat=10,latency=10
-numa hmat-lb,initiator=1,target=0,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=10
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-latency,base-lat=10,latency=5
-numa hmat-lb,initiator=1,target=1,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=5
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-latency,base-lat=10,latency=15
-numa hmat-lb,initiator=1,target=2,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=15
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-latency,base-lat=10,latency=20
-numa hmat-lb,initiator=1,target=3,hierarchy=memory,data-type=access-bandwidth,base-bw=20,bandwidth=20
Result:
[
{
"path":"\/platform\/hmem.1",
"id":1,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax1.0",
"size":"4.00 GiB (4.29 GB)"
}
]
},
{
"path":"\/platform\/hmem.0",
"id":0,
"size":"4.00 GiB (4.29 GB)",
"align":2097152,
"devices":[
{
"chardev":"dax0.0",
"size":"4.00 GiB (4.29 GB)"
}
]
}
]
[..]
240000000-43fffffff : Soft Reserved
240000000-33fffffff : hmem.0
240000000-33fffffff : dax0.0
340000000-43fffffff : hmem.1
340000000-43fffffff : dax1.0
Reviewed-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
2019-11-06 17:43:55 -08:00
|
|
|
res = target->memregions.child;
|
|
|
|
while (res) {
|
|
|
|
res_next = res->sibling;
|
|
|
|
__release_region(&target->memregions, res->start,
|
|
|
|
resource_size(res));
|
|
|
|
res = res_next;
|
|
|
|
}
|
2019-03-11 14:56:03 -06:00
|
|
|
kfree(target);
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry_safe(initiator, inext, &initiators, node) {
|
|
|
|
list_del(&initiator->node);
|
|
|
|
kfree(initiator);
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry_safe(loc, lnext, &localities, node) {
|
|
|
|
list_del(&loc->node);
|
|
|
|
kfree(loc);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
static __init int hmat_init(void)
|
|
|
|
{
|
|
|
|
struct acpi_table_header *tbl;
|
|
|
|
enum acpi_hmat_type i;
|
|
|
|
acpi_status status;
|
|
|
|
|
2020-10-13 16:49:02 -07:00
|
|
|
if (srat_disabled() || hmat_disable)
|
2019-03-11 14:55:59 -06:00
|
|
|
return 0;
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
status = acpi_get_table(ACPI_SIG_SRAT, 0, &tbl);
|
|
|
|
if (ACPI_FAILURE(status))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
if (acpi_table_parse_entries(ACPI_SIG_SRAT,
|
|
|
|
sizeof(struct acpi_table_srat),
|
|
|
|
ACPI_SRAT_TYPE_MEMORY_AFFINITY,
|
|
|
|
srat_parse_mem_affinity, 0) < 0)
|
|
|
|
goto out_put;
|
2023-12-21 15:02:49 -07:00
|
|
|
|
|
|
|
if (acpi_table_parse_entries(ACPI_SIG_SRAT,
|
|
|
|
sizeof(struct acpi_table_srat),
|
|
|
|
ACPI_SRAT_TYPE_GENERIC_PORT_AFFINITY,
|
|
|
|
srat_parse_genport_affinity, 0) < 0)
|
|
|
|
goto out_put;
|
|
|
|
|
2019-03-11 14:56:03 -06:00
|
|
|
acpi_put_table(tbl);
|
|
|
|
|
2019-03-11 14:55:59 -06:00
|
|
|
status = acpi_get_table(ACPI_SIG_HMAT, 0, &tbl);
|
|
|
|
if (ACPI_FAILURE(status))
|
2019-04-09 22:14:50 -04:00
|
|
|
goto out_put;
|
2019-03-11 14:55:59 -06:00
|
|
|
|
|
|
|
hmat_revision = tbl->revision;
|
|
|
|
switch (hmat_revision) {
|
|
|
|
case 1:
|
|
|
|
case 2:
|
|
|
|
break;
|
|
|
|
default:
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_notice("Ignoring: Unknown revision:%d\n", hmat_revision);
|
2019-03-11 14:55:59 -06:00
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
2019-04-17 11:13:10 -07:00
|
|
|
for (i = ACPI_HMAT_TYPE_PROXIMITY; i < ACPI_HMAT_TYPE_RESERVED; i++) {
|
2019-03-11 14:55:59 -06:00
|
|
|
if (acpi_table_parse_entries(ACPI_SIG_HMAT,
|
|
|
|
sizeof(struct acpi_table_hmat), i,
|
|
|
|
hmat_parse_subtable, 0) < 0) {
|
2022-09-09 17:56:24 +08:00
|
|
|
pr_notice("Ignoring: Invalid table");
|
2019-03-11 14:55:59 -06:00
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
}
|
2019-03-11 14:56:03 -06:00
|
|
|
hmat_register_targets();
|
2019-08-05 08:27:05 -06:00
|
|
|
|
|
|
|
/* Keep the table and structures if the notifier may use them */
|
2023-09-26 14:06:27 +08:00
|
|
|
if (hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
|
|
|
|
goto out_put;
|
|
|
|
|
|
|
|
if (!hmat_set_default_dram_perf())
|
|
|
|
register_mt_adistance_algorithm(&hmat_adist_nb);
|
|
|
|
|
|
|
|
return 0;
|
2019-03-11 14:55:59 -06:00
|
|
|
out_put:
|
2019-03-11 14:56:03 -06:00
|
|
|
hmat_free_structures();
|
2019-03-11 14:55:59 -06:00
|
|
|
acpi_put_table(tbl);
|
|
|
|
return 0;
|
|
|
|
}
|
2023-02-10 01:06:51 -08:00
|
|
|
subsys_initcall(hmat_init);
|