2020-02-10 17:02:41 +01:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
/*
|
|
|
|
* Copyright (C) 2016 Linaro Ltd; <ard.biesheuvel@linaro.org>
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/efi.h>
|
|
|
|
#include <linux/log2.h>
|
|
|
|
#include <asm/efi.h>
|
|
|
|
|
|
|
|
#include "efistub.h"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Return the number of slots covered by this entry, i.e., the number of
|
|
|
|
* addresses it covers that are suitably aligned and supply enough room
|
|
|
|
* for the allocation.
|
|
|
|
*/
|
|
|
|
static unsigned long get_entry_num_slots(efi_memory_desc_t *md,
|
|
|
|
unsigned long size,
|
2023-08-07 18:27:18 +02:00
|
|
|
unsigned long align_shift,
|
x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR
The EFI stub's kernel placement logic randomizes the physical placement
of the kernel by taking all available memory into account, and picking a
region at random, based on a random seed.
When KASLR is disabled, this seed is set to 0x0, and this results in the
lowest available region of memory to be selected for loading the kernel,
even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is
typically reserved for the GFP_DMA region, to accommodate masters that
can only access the first 16 MiB of system memory.
Even if such devices are rare these days, we may still end up with a
warning in the kernel log, as reported by Tom:
swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
Fix this by tweaking the random allocation logic to accept a low bound
on the placement, and set it to LOAD_PHYSICAL_ADDR.
Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
Reported-by: Tom Englund <tomenglund26@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2024-01-30 19:01:35 +01:00
|
|
|
u64 alloc_min, u64 alloc_max)
|
2020-02-10 17:02:41 +01:00
|
|
|
{
|
|
|
|
unsigned long align = 1UL << align_shift;
|
|
|
|
u64 first_slot, last_slot, region_end;
|
|
|
|
|
|
|
|
if (md->type != EFI_CONVENTIONAL_MEMORY)
|
|
|
|
return 0;
|
|
|
|
|
efi: Avoid cold plugged memory for placing the kernel
UEFI 2.11 introduced EFI_MEMORY_HOT_PLUGGABLE to annotate system memory
regions that are 'cold plugged' at boot, i.e., hot pluggable memory that
is available from early boot, and described as system RAM by the
firmware.
Existing loaders and EFI applications running in the boot context will
happily use this memory for allocating data structures that cannot be
freed or moved at runtime, and this prevents the memory from being
unplugged. Going forward, the new EFI_MEMORY_HOT_PLUGGABLE attribute
should be tested, and memory annotated as such should be avoided for
such allocations.
In the EFI stub, there are a couple of occurrences where, instead of the
high-level AllocatePages() UEFI boot service, a low-level code sequence
is used that traverses the EFI memory map and carves out the requested
number of pages from a free region. This is needed, e.g., for allocating
as low as possible, or for allocating pages at random.
While AllocatePages() should presumably avoid special purpose memory and
cold plugged regions, this manual approach needs to incorporate this
logic itself, in order to prevent the kernel itself from ending up in a
hot unpluggable region, preventing it from being unplugged.
So add the EFI_MEMORY_HOTPLUGGABLE macro definition, and check for it
where appropriate.
Cc: stable@vger.kernel.org
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2025-02-01 18:21:35 +01:00
|
|
|
if (md->attribute & EFI_MEMORY_HOT_PLUGGABLE)
|
|
|
|
return 0;
|
|
|
|
|
2020-02-10 17:02:41 +01:00
|
|
|
if (efi_soft_reserve_enabled() &&
|
|
|
|
(md->attribute & EFI_MEMORY_SP))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
region_end = min(md->phys_addr + md->num_pages * EFI_PAGE_SIZE - 1,
|
x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR
The EFI stub's kernel placement logic randomizes the physical placement
of the kernel by taking all available memory into account, and picking a
region at random, based on a random seed.
When KASLR is disabled, this seed is set to 0x0, and this results in the
lowest available region of memory to be selected for loading the kernel,
even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is
typically reserved for the GFP_DMA region, to accommodate masters that
can only access the first 16 MiB of system memory.
Even if such devices are rare these days, we may still end up with a
warning in the kernel log, as reported by Tom:
swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
Fix this by tweaking the random allocation logic to accept a low bound
on the placement, and set it to LOAD_PHYSICAL_ADDR.
Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
Reported-by: Tom Englund <tomenglund26@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2024-01-30 19:01:35 +01:00
|
|
|
alloc_max);
|
2021-07-20 21:14:05 +10:00
|
|
|
if (region_end < size)
|
|
|
|
return 0;
|
2020-02-10 17:02:41 +01:00
|
|
|
|
x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR
The EFI stub's kernel placement logic randomizes the physical placement
of the kernel by taking all available memory into account, and picking a
region at random, based on a random seed.
When KASLR is disabled, this seed is set to 0x0, and this results in the
lowest available region of memory to be selected for loading the kernel,
even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is
typically reserved for the GFP_DMA region, to accommodate masters that
can only access the first 16 MiB of system memory.
Even if such devices are rare these days, we may still end up with a
warning in the kernel log, as reported by Tom:
swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
Fix this by tweaking the random allocation logic to accept a low bound
on the placement, and set it to LOAD_PHYSICAL_ADDR.
Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
Reported-by: Tom Englund <tomenglund26@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2024-01-30 19:01:35 +01:00
|
|
|
first_slot = round_up(max(md->phys_addr, alloc_min), align);
|
2020-02-10 17:02:41 +01:00
|
|
|
last_slot = round_down(region_end - size + 1, align);
|
|
|
|
|
|
|
|
if (first_slot > last_slot)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
return ((unsigned long)(last_slot - first_slot) >> align_shift) + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The UEFI memory descriptors have a virtual address field that is only used
|
|
|
|
* when installing the virtual mapping using SetVirtualAddressMap(). Since it
|
|
|
|
* is unused here, we can reuse it to keep track of each descriptor's slot
|
|
|
|
* count.
|
|
|
|
*/
|
|
|
|
#define MD_NUM_SLOTS(md) ((md)->virt_addr)
|
|
|
|
|
|
|
|
efi_status_t efi_random_alloc(unsigned long size,
|
|
|
|
unsigned long align,
|
|
|
|
unsigned long *addr,
|
2022-08-02 11:00:16 +02:00
|
|
|
unsigned long random_seed,
|
2023-08-07 18:27:18 +02:00
|
|
|
int memory_type,
|
x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR
The EFI stub's kernel placement logic randomizes the physical placement
of the kernel by taking all available memory into account, and picking a
region at random, based on a random seed.
When KASLR is disabled, this seed is set to 0x0, and this results in the
lowest available region of memory to be selected for loading the kernel,
even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is
typically reserved for the GFP_DMA region, to accommodate masters that
can only access the first 16 MiB of system memory.
Even if such devices are rare these days, we may still end up with a
warning in the kernel log, as reported by Tom:
swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
Fix this by tweaking the random allocation logic to accept a low bound
on the placement, and set it to LOAD_PHYSICAL_ADDR.
Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
Reported-by: Tom Englund <tomenglund26@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2024-01-30 19:01:35 +01:00
|
|
|
unsigned long alloc_min,
|
|
|
|
unsigned long alloc_max)
|
2020-02-10 17:02:41 +01:00
|
|
|
{
|
2024-12-19 15:30:39 +01:00
|
|
|
struct efi_boot_memmap *map __free(efi_pool) = NULL;
|
2022-06-03 15:29:22 +02:00
|
|
|
unsigned long total_slots = 0, target_slot;
|
2022-05-03 14:29:39 +02:00
|
|
|
unsigned long total_mirrored_slots = 0;
|
2020-02-10 17:02:41 +01:00
|
|
|
efi_status_t status;
|
|
|
|
int map_offset;
|
|
|
|
|
2022-09-15 23:20:06 +02:00
|
|
|
status = efi_get_memory_map(&map, false);
|
2020-02-10 17:02:41 +01:00
|
|
|
if (status != EFI_SUCCESS)
|
|
|
|
return status;
|
|
|
|
|
|
|
|
if (align < EFI_ALLOC_ALIGN)
|
|
|
|
align = EFI_ALLOC_ALIGN;
|
|
|
|
|
2025-03-14 12:03:33 +01:00
|
|
|
/* Avoid address 0x0, as it can be mistaken for NULL */
|
|
|
|
if (alloc_min == 0)
|
|
|
|
alloc_min = align;
|
|
|
|
|
2020-04-13 16:01:05 +02:00
|
|
|
size = round_up(size, EFI_ALLOC_ALIGN);
|
|
|
|
|
2020-02-10 17:02:41 +01:00
|
|
|
/* count the suitable slots in each memory map entry */
|
2022-06-03 15:29:22 +02:00
|
|
|
for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) {
|
|
|
|
efi_memory_desc_t *md = (void *)map->map + map_offset;
|
2020-02-10 17:02:41 +01:00
|
|
|
unsigned long slots;
|
|
|
|
|
x86/efistub: Avoid placing the kernel below LOAD_PHYSICAL_ADDR
The EFI stub's kernel placement logic randomizes the physical placement
of the kernel by taking all available memory into account, and picking a
region at random, based on a random seed.
When KASLR is disabled, this seed is set to 0x0, and this results in the
lowest available region of memory to be selected for loading the kernel,
even if this is below LOAD_PHYSICAL_ADDR. Some of this memory is
typically reserved for the GFP_DMA region, to accommodate masters that
can only access the first 16 MiB of system memory.
Even if such devices are rare these days, we may still end up with a
warning in the kernel log, as reported by Tom:
swapper/0: page allocation failure: order:10, mode:0xcc1(GFP_KERNEL|GFP_DMA), nodemask=(null),cpuset=/,mems_allowed=0
Fix this by tweaking the random allocation logic to accept a low bound
on the placement, and set it to LOAD_PHYSICAL_ADDR.
Fixes: a1b87d54f4e4 ("x86/efistub: Avoid legacy decompressor when doing EFI boot")
Reported-by: Tom Englund <tomenglund26@gmail.com>
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218404
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
2024-01-30 19:01:35 +01:00
|
|
|
slots = get_entry_num_slots(md, size, ilog2(align), alloc_min,
|
|
|
|
alloc_max);
|
2020-02-10 17:02:41 +01:00
|
|
|
MD_NUM_SLOTS(md) = slots;
|
|
|
|
total_slots += slots;
|
2022-05-03 14:29:39 +02:00
|
|
|
if (md->attribute & EFI_MEMORY_MORE_RELIABLE)
|
|
|
|
total_mirrored_slots += slots;
|
2020-02-10 17:02:41 +01:00
|
|
|
}
|
|
|
|
|
2022-05-03 14:29:39 +02:00
|
|
|
/* consider only mirrored slots for randomization if any exist */
|
|
|
|
if (total_mirrored_slots > 0)
|
|
|
|
total_slots = total_mirrored_slots;
|
|
|
|
|
2020-02-10 17:02:41 +01:00
|
|
|
/* find a random number between 0 and total_slots */
|
2020-04-13 16:06:56 +02:00
|
|
|
target_slot = (total_slots * (u64)(random_seed & U32_MAX)) >> 32;
|
2020-02-10 17:02:41 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* target_slot is now a value in the range [0, total_slots), and so
|
|
|
|
* it corresponds with exactly one of the suitable slots we recorded
|
|
|
|
* when iterating over the memory map the first time around.
|
|
|
|
*
|
|
|
|
* So iterate over the memory map again, subtracting the number of
|
|
|
|
* slots of each entry at each iteration, until we have found the entry
|
|
|
|
* that covers our chosen slot. Use the residual value of target_slot
|
|
|
|
* to calculate the randomly chosen address, and allocate it directly
|
|
|
|
* using EFI_ALLOCATE_ADDRESS.
|
|
|
|
*/
|
2023-03-08 00:21:34 +01:00
|
|
|
status = EFI_OUT_OF_RESOURCES;
|
2022-06-03 15:29:22 +02:00
|
|
|
for (map_offset = 0; map_offset < map->map_size; map_offset += map->desc_size) {
|
|
|
|
efi_memory_desc_t *md = (void *)map->map + map_offset;
|
2020-02-10 17:02:41 +01:00
|
|
|
efi_physical_addr_t target;
|
|
|
|
unsigned long pages;
|
|
|
|
|
2022-05-03 14:29:39 +02:00
|
|
|
if (total_mirrored_slots > 0 &&
|
|
|
|
!(md->attribute & EFI_MEMORY_MORE_RELIABLE))
|
|
|
|
continue;
|
|
|
|
|
2020-02-10 17:02:41 +01:00
|
|
|
if (target_slot >= MD_NUM_SLOTS(md)) {
|
|
|
|
target_slot -= MD_NUM_SLOTS(md);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2024-03-26 11:15:25 +01:00
|
|
|
target = round_up(max_t(u64, md->phys_addr, alloc_min), align) + target_slot * align;
|
2020-04-13 16:01:05 +02:00
|
|
|
pages = size / EFI_PAGE_SIZE;
|
2020-02-10 17:02:41 +01:00
|
|
|
|
|
|
|
status = efi_bs_call(allocate_pages, EFI_ALLOCATE_ADDRESS,
|
2022-08-02 11:00:16 +02:00
|
|
|
memory_type, pages, &target);
|
2020-02-10 17:02:41 +01:00
|
|
|
if (status == EFI_SUCCESS)
|
|
|
|
*addr = target;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return status;
|
|
|
|
}
|