2018-07-19 13:11:28 +02:00
|
|
|
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/string.h>
|
2019-02-03 21:35:45 +01:00
|
|
|
#include <linux/elf.h>
|
2023-10-27 14:12:36 +02:00
|
|
|
#include <asm/page-states.h>
|
2020-10-19 11:01:33 +02:00
|
|
|
#include <asm/boot_data.h>
|
2024-03-01 07:03:49 +01:00
|
|
|
#include <asm/extmem.h>
|
2019-02-03 21:37:20 +01:00
|
|
|
#include <asm/sections.h>
|
2022-12-11 08:18:57 +01:00
|
|
|
#include <asm/maccess.h>
|
2020-10-09 17:14:02 +02:00
|
|
|
#include <asm/cpu_mf.h>
|
2018-04-11 11:56:55 +02:00
|
|
|
#include <asm/setup.h>
|
2020-10-06 22:12:39 +02:00
|
|
|
#include <asm/kasan.h>
|
2019-02-03 21:35:45 +01:00
|
|
|
#include <asm/kexec.h>
|
2018-07-25 15:01:11 +02:00
|
|
|
#include <asm/sclp.h>
|
2019-02-03 21:37:20 +01:00
|
|
|
#include <asm/diag.h>
|
2019-04-01 19:11:03 +02:00
|
|
|
#include <asm/uv.h>
|
2022-07-20 08:22:01 +02:00
|
|
|
#include <asm/abs_lowcore.h>
|
2023-02-08 18:11:25 +01:00
|
|
|
#include <asm/physmem_info.h>
|
2022-04-23 21:31:22 +02:00
|
|
|
#include "decompressor.h"
|
2018-07-19 13:11:28 +02:00
|
|
|
#include "boot.h"
|
2021-07-05 19:33:27 +02:00
|
|
|
#include "uv.h"
|
2018-07-19 13:11:28 +02:00
|
|
|
|
2023-12-02 10:57:15 +01:00
|
|
|
struct vm_layout __bootdata_preserved(vm_layout);
|
2022-07-20 08:22:01 +02:00
|
|
|
unsigned long __bootdata_preserved(__abs_lowcore);
|
2022-07-24 15:02:16 +02:00
|
|
|
unsigned long __bootdata_preserved(__memcpy_real_area);
|
2022-12-11 08:18:57 +01:00
|
|
|
pte_t *__bootdata_preserved(memcpy_real_ptep);
|
2020-10-06 22:12:39 +02:00
|
|
|
unsigned long __bootdata_preserved(VMALLOC_START);
|
|
|
|
unsigned long __bootdata_preserved(VMALLOC_END);
|
|
|
|
struct page *__bootdata_preserved(vmemmap);
|
|
|
|
unsigned long __bootdata_preserved(vmemmap_size);
|
|
|
|
unsigned long __bootdata_preserved(MODULES_VADDR);
|
|
|
|
unsigned long __bootdata_preserved(MODULES_END);
|
s390/mm: rework arch_get_mappable_range() callback
As per description in mm/memory_hotplug.c platforms should define
arch_get_mappable_range() that provides maximum possible addressable
physical memory range for which the linear mapping could be created.
The current implementation uses VMEM_MAX_PHYS macro as the maximum
mappable physical address and it is simply a cast to vmemmap. Since
the address is in physical address space the natural upper limit of
MAX_PHYSMEM_BITS is honoured:
vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS);
Further, to make sure the identity mapping would not overlay with
vmemmap, the size of identity mapping could be stripped like this:
ident_map_size = min(ident_map_size, vmemmap_start);
Similarily, any other memory that could be added (e.g DCSS segment)
should not overlay with vmemmap as well and that is prevented by
using vmemmap (VMEM_MAX_PHYS macro) as the upper limit.
However, while the use of VMEM_MAX_PHYS brings the desired result
it actually poses two issues:
1. As described, vmemmap is handled as a physical address, although
it is actually a pointer to struct page in virtual address space.
2. As vmemmap is a virtual address it could have been located
anywhere in the virtual address space. However, the desired
necessity to honour MAX_PHYSMEM_BITS limit prevents that.
Rework arch_get_mappable_range() callback in a way it does not
use VMEM_MAX_PHYS macro and does not confuse the notion of virtual
vs physical address spacees as result. That paves the way for moving
vmemmap elsewhere and optimizing the virtual address space layout.
Introduce max_mappable preserved boot variable and let function
setup_kernel_memory_layout() set it up. As result, the rest of the
code is does not need to know the virtual memory layout specifics.
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2023-07-02 23:11:18 +02:00
|
|
|
unsigned long __bootdata_preserved(max_mappable);
|
2018-04-10 14:14:02 +02:00
|
|
|
|
2021-05-05 22:01:10 +02:00
|
|
|
u64 __bootdata_preserved(stfle_fac_list[16]);
|
2021-06-15 14:25:41 +02:00
|
|
|
struct oldmem_data __bootdata_preserved(oldmem_data);
|
2021-05-05 22:01:10 +02:00
|
|
|
|
2022-12-04 21:15:41 +01:00
|
|
|
struct machine_info machine;
|
|
|
|
|
2018-07-25 15:01:11 +02:00
|
|
|
void error(char *x)
|
|
|
|
{
|
|
|
|
sclp_early_printk("\n\n");
|
|
|
|
sclp_early_printk(x);
|
|
|
|
sclp_early_printk("\n\n -- System halted");
|
|
|
|
|
2019-04-30 12:33:45 +02:00
|
|
|
disabled_wait();
|
2018-07-25 15:01:11 +02:00
|
|
|
}
|
|
|
|
|
2022-12-04 21:15:41 +01:00
|
|
|
static void detect_facilities(void)
|
|
|
|
{
|
|
|
|
if (test_facility(8)) {
|
|
|
|
machine.has_edat1 = 1;
|
2023-09-11 21:40:13 +02:00
|
|
|
local_ctl_set_bit(0, CR0_EDAT_BIT);
|
2022-12-04 21:15:41 +01:00
|
|
|
}
|
|
|
|
if (test_facility(78))
|
|
|
|
machine.has_edat2 = 1;
|
2023-08-25 14:29:48 +02:00
|
|
|
if (test_facility(130))
|
2022-12-04 21:15:41 +01:00
|
|
|
machine.has_nx = 1;
|
|
|
|
}
|
|
|
|
|
2023-10-27 14:12:36 +02:00
|
|
|
static int cmma_test_essa(void)
|
|
|
|
{
|
|
|
|
unsigned long reg1, reg2, tmp = 0;
|
|
|
|
int rc = 1;
|
|
|
|
psw_t old;
|
|
|
|
|
|
|
|
/* Test ESSA_GET_STATE */
|
|
|
|
asm volatile(
|
|
|
|
" mvc 0(16,%[psw_old]),0(%[psw_pgm])\n"
|
|
|
|
" epsw %[reg1],%[reg2]\n"
|
|
|
|
" st %[reg1],0(%[psw_pgm])\n"
|
|
|
|
" st %[reg2],4(%[psw_pgm])\n"
|
|
|
|
" larl %[reg1],1f\n"
|
|
|
|
" stg %[reg1],8(%[psw_pgm])\n"
|
|
|
|
" .insn rrf,0xb9ab0000,%[tmp],%[tmp],%[cmd],0\n"
|
|
|
|
" la %[rc],0\n"
|
|
|
|
"1: mvc 0(16,%[psw_pgm]),0(%[psw_old])\n"
|
|
|
|
: [reg1] "=&d" (reg1),
|
|
|
|
[reg2] "=&a" (reg2),
|
|
|
|
[rc] "+&d" (rc),
|
|
|
|
[tmp] "=&d" (tmp),
|
|
|
|
"+Q" (S390_lowcore.program_new_psw),
|
|
|
|
"=Q" (old)
|
|
|
|
: [psw_old] "a" (&old),
|
|
|
|
[psw_pgm] "a" (&S390_lowcore.program_new_psw),
|
|
|
|
[cmd] "i" (ESSA_GET_STATE)
|
|
|
|
: "cc", "memory");
|
|
|
|
return rc;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void cmma_init(void)
|
|
|
|
{
|
|
|
|
if (!cmma_flag)
|
|
|
|
return;
|
|
|
|
if (cmma_test_essa()) {
|
|
|
|
cmma_flag = 0;
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (test_facility(147))
|
|
|
|
cmma_flag = 2;
|
|
|
|
}
|
|
|
|
|
2020-10-09 17:14:02 +02:00
|
|
|
static void setup_lpp(void)
|
|
|
|
{
|
|
|
|
S390_lowcore.current_pid = 0;
|
|
|
|
S390_lowcore.lpp = LPP_MAGIC;
|
|
|
|
if (test_facility(40))
|
|
|
|
lpp(&S390_lowcore.lpp);
|
|
|
|
}
|
|
|
|
|
2018-04-11 11:56:55 +02:00
|
|
|
#ifdef CONFIG_KERNEL_UNCOMPRESSED
|
2024-03-22 14:39:57 +01:00
|
|
|
static unsigned long mem_safe_offset(void)
|
2018-04-11 11:56:55 +02:00
|
|
|
{
|
2024-03-22 14:39:57 +01:00
|
|
|
return (unsigned long)_compressed_start;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void deploy_kernel(void *output)
|
|
|
|
{
|
|
|
|
void *uncompressed_start = (void *)_compressed_start;
|
|
|
|
|
|
|
|
if (output == uncompressed_start)
|
|
|
|
return;
|
|
|
|
memmove(output, uncompressed_start, vmlinux.image_size);
|
|
|
|
memset(uncompressed_start, 0, vmlinux.image_size);
|
2018-04-11 11:56:55 +02:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2023-02-02 13:59:36 +01:00
|
|
|
static void rescue_initrd(unsigned long min, unsigned long max)
|
2018-04-11 11:56:55 +02:00
|
|
|
{
|
2023-02-02 13:59:36 +01:00
|
|
|
unsigned long old_addr, addr, size;
|
|
|
|
|
2018-04-11 11:56:55 +02:00
|
|
|
if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD))
|
2023-02-02 13:59:36 +01:00
|
|
|
return;
|
|
|
|
if (!get_physmem_reserved(RR_INITRD, &addr, &size))
|
|
|
|
return;
|
|
|
|
if (addr >= min && addr + size <= max)
|
|
|
|
return;
|
|
|
|
old_addr = addr;
|
|
|
|
physmem_free(RR_INITRD);
|
|
|
|
addr = physmem_alloc_top_down(RR_INITRD, size, 0);
|
|
|
|
memmove((void *)addr, (void *)old_addr, size);
|
2018-04-11 11:56:55 +02:00
|
|
|
}
|
|
|
|
|
2018-04-10 14:14:02 +02:00
|
|
|
static void copy_bootdata(void)
|
|
|
|
{
|
|
|
|
if (__boot_data_end - __boot_data_start != vmlinux.bootdata_size)
|
|
|
|
error(".boot.data section size mismatch");
|
|
|
|
memcpy((void *)vmlinux.bootdata_off, __boot_data_start, vmlinux.bootdata_size);
|
2019-04-01 19:10:45 +02:00
|
|
|
if (__boot_data_preserved_end - __boot_data_preserved_start != vmlinux.bootdata_preserved_size)
|
|
|
|
error(".boot.preserved.data section size mismatch");
|
|
|
|
memcpy((void *)vmlinux.bootdata_preserved_off, __boot_data_preserved_start, vmlinux.bootdata_preserved_size);
|
2018-04-10 14:14:02 +02:00
|
|
|
}
|
|
|
|
|
2024-02-20 14:35:43 +01:00
|
|
|
static void kaslr_adjust_relocs(unsigned long min_addr, unsigned long max_addr,
|
|
|
|
unsigned long offset, unsigned long phys_offset)
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
{
|
|
|
|
int *reloc;
|
|
|
|
long loc;
|
|
|
|
|
|
|
|
/* Adjust R_390_64 relocations */
|
2024-02-26 11:47:40 +01:00
|
|
|
for (reloc = (int *)__vmlinux_relocs_64_start; reloc < (int *)__vmlinux_relocs_64_end; reloc++) {
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
loc = (long)*reloc + phys_offset;
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
if (loc < min_addr || loc > max_addr)
|
|
|
|
error("64-bit relocation outside of kernel!\n");
|
2024-03-22 14:39:57 +01:00
|
|
|
*(u64 *)loc += offset - __START_KERNEL;
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kaslr_adjust_got(unsigned long offset)
|
|
|
|
{
|
|
|
|
u64 *entry;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Even without -fPIE, Clang still uses a global offset table for some
|
2024-02-21 14:46:53 +01:00
|
|
|
* reason. Adjust the GOT entries.
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
*/
|
2024-02-21 11:51:55 +01:00
|
|
|
for (entry = (u64 *)vmlinux.got_start; entry < (u64 *)vmlinux.got_end; entry++)
|
2024-03-22 14:39:57 +01:00
|
|
|
*entry += offset - __START_KERNEL;
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
}
|
|
|
|
|
2020-10-19 11:01:33 +02:00
|
|
|
/*
|
|
|
|
* Merge information from several sources into a single ident_map_size value.
|
|
|
|
* "ident_map_size" represents the upper limit of physical memory we may ever
|
|
|
|
* reach. It might not be all online memory, but also include standby (offline)
|
|
|
|
* memory. "ident_map_size" could be lower then actual standby or even online
|
|
|
|
* memory present, due to limiting factors. We should never go above this limit.
|
|
|
|
* It is the size of our identity mapping.
|
|
|
|
*
|
|
|
|
* Consider the following factors:
|
|
|
|
* 1. max_physmem_end - end of physical memory online or standby.
|
2023-02-08 18:11:25 +01:00
|
|
|
* Always >= end of the last online memory range (get_physmem_online_end()).
|
2020-10-19 11:01:33 +02:00
|
|
|
* 2. CONFIG_MAX_PHYSMEM_BITS - the maximum size of physical memory the
|
|
|
|
* kernel is able to support.
|
|
|
|
* 3. "mem=" kernel command line option which limits physical memory usage.
|
|
|
|
* 4. OLDMEM_BASE which is a kdump memory limit when the kernel is executed as
|
|
|
|
* crash kernel.
|
|
|
|
* 5. "hsa" size which is a memory limit when the kernel is executed during
|
|
|
|
* zfcp/nvme dump.
|
|
|
|
*/
|
|
|
|
static void setup_ident_map_size(unsigned long max_physmem_end)
|
|
|
|
{
|
|
|
|
unsigned long hsa_size;
|
|
|
|
|
|
|
|
ident_map_size = max_physmem_end;
|
|
|
|
if (memory_limit)
|
|
|
|
ident_map_size = min(ident_map_size, memory_limit);
|
|
|
|
ident_map_size = min(ident_map_size, 1UL << MAX_PHYSMEM_BITS);
|
|
|
|
|
|
|
|
#ifdef CONFIG_CRASH_DUMP
|
2021-06-15 14:25:41 +02:00
|
|
|
if (oldmem_data.start) {
|
2023-03-31 15:03:22 +02:00
|
|
|
__kaslr_enabled = 0;
|
2021-06-15 14:25:41 +02:00
|
|
|
ident_map_size = min(ident_map_size, oldmem_data.size);
|
2020-10-19 11:01:33 +02:00
|
|
|
} else if (ipl_block_valid && is_ipl_block_dump()) {
|
2023-03-31 15:03:22 +02:00
|
|
|
__kaslr_enabled = 0;
|
2020-10-19 11:01:33 +02:00
|
|
|
if (!sclp_early_get_hsa_size(&hsa_size) && hsa_size)
|
|
|
|
ident_map_size = min(ident_map_size, hsa_size);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
#define FIXMAP_SIZE round_up(MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore))
|
|
|
|
|
|
|
|
static unsigned long get_vmem_size(unsigned long identity_size,
|
|
|
|
unsigned long vmemmap_size,
|
|
|
|
unsigned long vmalloc_size,
|
|
|
|
unsigned long rte_size)
|
|
|
|
{
|
|
|
|
unsigned long max_mappable, vsize;
|
|
|
|
|
|
|
|
max_mappable = max(identity_size, MAX_DCSS_ADDR);
|
|
|
|
vsize = round_up(SZ_2G + max_mappable, rte_size) +
|
|
|
|
round_up(vmemmap_size, rte_size) +
|
|
|
|
FIXMAP_SIZE + MODULES_LEN + KASLR_LEN;
|
|
|
|
return size_add(vsize, vmalloc_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned long setup_kernel_memory_layout(unsigned long kernel_size)
|
2020-10-06 22:12:39 +02:00
|
|
|
{
|
2021-10-14 13:53:54 +02:00
|
|
|
unsigned long vmemmap_start;
|
2023-09-26 15:58:51 +02:00
|
|
|
unsigned long kernel_start;
|
2022-12-13 11:35:11 +01:00
|
|
|
unsigned long asce_limit;
|
2020-10-06 22:12:39 +02:00
|
|
|
unsigned long rte_size;
|
|
|
|
unsigned long pages;
|
2023-07-06 12:28:17 +02:00
|
|
|
unsigned long vsize;
|
2022-05-26 07:57:36 +02:00
|
|
|
unsigned long vmax;
|
2020-10-06 22:12:39 +02:00
|
|
|
|
|
|
|
pages = ident_map_size / PAGE_SIZE;
|
|
|
|
/* vmemmap contains a multiple of PAGES_PER_SECTION struct pages */
|
|
|
|
vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page);
|
|
|
|
|
|
|
|
/* choose kernel address space layout: 4 or 3 levels. */
|
2024-03-22 14:39:57 +01:00
|
|
|
BUILD_BUG_ON(!IS_ALIGNED(__START_KERNEL, THREAD_SIZE));
|
2023-09-26 15:58:51 +02:00
|
|
|
BUILD_BUG_ON(!IS_ALIGNED(__NO_KASLR_START_KERNEL, THREAD_SIZE));
|
|
|
|
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > _REGION1_SIZE);
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION3_SIZE);
|
2023-09-26 15:58:51 +02:00
|
|
|
if (IS_ENABLED(CONFIG_KASAN) || __NO_KASLR_END_KERNEL > _REGION2_SIZE ||
|
|
|
|
(vsize > _REGION2_SIZE && kaslr_enabled())) {
|
2022-12-13 11:35:11 +01:00
|
|
|
asce_limit = _REGION1_SIZE;
|
2023-09-26 15:58:51 +02:00
|
|
|
if (__NO_KASLR_END_KERNEL > _REGION2_SIZE) {
|
|
|
|
rte_size = _REGION2_SIZE;
|
|
|
|
vsize = get_vmem_size(ident_map_size, vmemmap_size, vmalloc_size, _REGION2_SIZE);
|
|
|
|
} else {
|
|
|
|
rte_size = _REGION3_SIZE;
|
|
|
|
}
|
2020-10-06 22:12:39 +02:00
|
|
|
} else {
|
2022-12-13 11:35:11 +01:00
|
|
|
asce_limit = _REGION2_SIZE;
|
2021-10-14 13:53:54 +02:00
|
|
|
rte_size = _REGION3_SIZE;
|
2020-10-06 22:12:39 +02:00
|
|
|
}
|
2023-08-05 10:59:09 +02:00
|
|
|
|
2021-10-14 13:53:54 +02:00
|
|
|
/*
|
2023-08-05 10:59:09 +02:00
|
|
|
* Forcing modules and vmalloc area under the ultravisor
|
2021-10-14 13:53:54 +02:00
|
|
|
* secure storage limit, so that any vmalloc allocation
|
|
|
|
* we do could be used to back secure guest storage.
|
2023-09-26 15:58:51 +02:00
|
|
|
*
|
|
|
|
* Assume the secure storage limit always exceeds _REGION2_SIZE,
|
|
|
|
* otherwise asce_limit and rte_size would have been adjusted.
|
2021-10-14 13:53:54 +02:00
|
|
|
*/
|
2022-12-13 11:35:11 +01:00
|
|
|
vmax = adjust_to_uv_max(asce_limit);
|
2021-10-14 13:53:54 +02:00
|
|
|
#ifdef CONFIG_KASAN
|
2023-09-26 15:58:51 +02:00
|
|
|
BUILD_BUG_ON(__NO_KASLR_END_KERNEL > KASAN_SHADOW_START);
|
2021-10-14 13:53:54 +02:00
|
|
|
/* force vmalloc and modules below kasan shadow */
|
2022-05-26 07:57:36 +02:00
|
|
|
vmax = min(vmax, KASAN_SHADOW_START);
|
2020-10-06 22:12:39 +02:00
|
|
|
#endif
|
2023-09-26 15:58:51 +02:00
|
|
|
vsize = min(vsize, vmax);
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
if (kaslr_enabled()) {
|
2023-09-26 15:58:51 +02:00
|
|
|
unsigned long kernel_end, kaslr_len, slots, pos;
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
|
|
|
|
kaslr_len = max(KASLR_LEN, vmax - vsize);
|
|
|
|
slots = DIV_ROUND_UP(kaslr_len - kernel_size, THREAD_SIZE);
|
|
|
|
if (get_random(slots, &pos))
|
|
|
|
pos = 0;
|
2023-09-26 15:58:51 +02:00
|
|
|
kernel_end = vmax - pos * THREAD_SIZE;
|
|
|
|
kernel_start = round_down(kernel_end - kernel_size, THREAD_SIZE);
|
|
|
|
} else if (vmax < __NO_KASLR_END_KERNEL || vsize > __NO_KASLR_END_KERNEL) {
|
|
|
|
kernel_start = round_down(vmax - kernel_size, THREAD_SIZE);
|
|
|
|
decompressor_printk("The kernel base address is forced to %lx\n", kernel_start);
|
|
|
|
} else {
|
|
|
|
kernel_start = __NO_KASLR_START_KERNEL;
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
}
|
|
|
|
__kaslr_offset = kernel_start;
|
|
|
|
|
|
|
|
MODULES_END = round_down(kernel_start, _SEGMENT_SIZE);
|
2020-10-06 22:12:39 +02:00
|
|
|
MODULES_VADDR = MODULES_END - MODULES_LEN;
|
|
|
|
VMALLOC_END = MODULES_VADDR;
|
|
|
|
|
2021-10-14 13:53:54 +02:00
|
|
|
/* allow vmalloc area to occupy up to about 1/2 of the rest virtual space left */
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
vsize = (VMALLOC_END - FIXMAP_SIZE) / 2;
|
2024-03-01 07:05:39 +01:00
|
|
|
vsize = round_down(vsize, _SEGMENT_SIZE);
|
2023-07-16 10:56:00 +02:00
|
|
|
vmalloc_size = min(vmalloc_size, vsize);
|
2021-10-14 13:53:54 +02:00
|
|
|
VMALLOC_START = VMALLOC_END - vmalloc_size;
|
2020-10-06 22:12:39 +02:00
|
|
|
|
2024-03-01 07:05:39 +01:00
|
|
|
__memcpy_real_area = round_down(VMALLOC_START - MEMCPY_REAL_SIZE, PAGE_SIZE);
|
|
|
|
__abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE,
|
|
|
|
sizeof(struct lowcore));
|
|
|
|
|
2021-10-14 13:53:54 +02:00
|
|
|
/* split remaining virtual space between 1:1 mapping & vmemmap array */
|
2024-03-01 07:05:39 +01:00
|
|
|
pages = __abs_lowcore / (PAGE_SIZE + sizeof(struct page));
|
2020-10-06 22:12:39 +02:00
|
|
|
pages = SECTION_ALIGN_UP(pages);
|
2021-10-14 13:53:54 +02:00
|
|
|
/* keep vmemmap_start aligned to a top level region table entry */
|
2024-03-01 07:05:39 +01:00
|
|
|
vmemmap_start = round_down(__abs_lowcore - pages * sizeof(struct page), rte_size);
|
2021-10-14 13:53:54 +02:00
|
|
|
/* make sure identity map doesn't overlay with vmemmap */
|
|
|
|
ident_map_size = min(ident_map_size, vmemmap_start);
|
2020-10-06 22:12:39 +02:00
|
|
|
vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page);
|
2024-03-01 07:05:39 +01:00
|
|
|
/* make sure vmemmap doesn't overlay with absolute lowcore area */
|
|
|
|
if (vmemmap_start + vmemmap_size > __abs_lowcore) {
|
2023-07-11 07:58:24 +02:00
|
|
|
vmemmap_size = SECTION_ALIGN_DOWN(ident_map_size / PAGE_SIZE) * sizeof(struct page);
|
|
|
|
ident_map_size = vmemmap_size / sizeof(struct page) * PAGE_SIZE;
|
|
|
|
}
|
2021-10-14 13:53:54 +02:00
|
|
|
vmemmap = (struct page *)vmemmap_start;
|
2024-03-01 07:03:49 +01:00
|
|
|
/* maximum address for which linear mapping could be created (DCSS, memory) */
|
|
|
|
BUILD_BUG_ON(MAX_DCSS_ADDR > (1UL << MAX_PHYSMEM_BITS));
|
|
|
|
max_mappable = max(ident_map_size, MAX_DCSS_ADDR);
|
|
|
|
max_mappable = min(max_mappable, vmemmap_start);
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
__identity_base = round_down(vmemmap_start - max_mappable, rte_size);
|
2022-12-13 11:35:11 +01:00
|
|
|
|
|
|
|
return asce_limit;
|
2020-10-06 22:12:39 +02:00
|
|
|
}
|
|
|
|
|
2020-09-02 16:52:06 +02:00
|
|
|
/*
|
|
|
|
* This function clears the BSS section of the decompressed Linux kernel and NOT the decompressor's.
|
|
|
|
*/
|
2024-03-22 14:39:57 +01:00
|
|
|
static void clear_bss_section(unsigned long kernel_start)
|
2019-08-11 20:55:18 +02:00
|
|
|
{
|
2024-03-22 14:39:57 +01:00
|
|
|
memset((void *)kernel_start + vmlinux.image_size, 0, vmlinux.bss_size);
|
2019-08-11 20:55:18 +02:00
|
|
|
}
|
|
|
|
|
2020-09-18 12:25:37 +02:00
|
|
|
/*
|
|
|
|
* Set vmalloc area size to an 8th of (potential) physical memory
|
|
|
|
* size, unless size has been set by kernel command line parameter.
|
|
|
|
*/
|
|
|
|
static void setup_vmalloc_size(void)
|
|
|
|
{
|
|
|
|
unsigned long size;
|
|
|
|
|
|
|
|
if (vmalloc_size_set)
|
|
|
|
return;
|
2020-10-19 11:01:33 +02:00
|
|
|
size = round_up(ident_map_size / 8, _SEGMENT_SIZE);
|
2020-09-18 12:25:37 +02:00
|
|
|
vmalloc_size = max(size, vmalloc_size);
|
|
|
|
}
|
|
|
|
|
2024-03-22 14:39:57 +01:00
|
|
|
static void kaslr_adjust_vmlinux_info(long offset)
|
2021-08-06 12:55:09 +02:00
|
|
|
{
|
|
|
|
vmlinux.bootdata_off += offset;
|
|
|
|
vmlinux.bootdata_preserved_off += offset;
|
2024-02-21 11:51:55 +01:00
|
|
|
vmlinux.got_start += offset;
|
|
|
|
vmlinux.got_end += offset;
|
2022-12-13 11:35:11 +01:00
|
|
|
vmlinux.init_mm_off += offset;
|
|
|
|
vmlinux.swapper_pg_dir_off += offset;
|
|
|
|
vmlinux.invalid_pg_dir_off += offset;
|
2023-02-09 22:05:11 +01:00
|
|
|
#ifdef CONFIG_KASAN
|
|
|
|
vmlinux.kasan_early_shadow_page_off += offset;
|
|
|
|
vmlinux.kasan_early_shadow_pte_off += offset;
|
|
|
|
vmlinux.kasan_early_shadow_pmd_off += offset;
|
|
|
|
vmlinux.kasan_early_shadow_pud_off += offset;
|
|
|
|
vmlinux.kasan_early_shadow_p4d_off += offset;
|
|
|
|
#endif
|
2021-08-06 12:55:09 +02:00
|
|
|
}
|
|
|
|
|
2024-03-22 14:39:57 +01:00
|
|
|
static void fixup_vmlinux_info(void)
|
|
|
|
{
|
|
|
|
vmlinux.entry -= __START_KERNEL;
|
|
|
|
kaslr_adjust_vmlinux_info(-__START_KERNEL);
|
|
|
|
}
|
|
|
|
|
2018-07-19 13:11:28 +02:00
|
|
|
void startup_kernel(void)
|
|
|
|
{
|
2024-03-22 14:39:57 +01:00
|
|
|
unsigned long kernel_size = vmlinux.image_size + vmlinux.bss_size;
|
|
|
|
unsigned long nokaslr_offset_phys = mem_safe_offset();
|
2023-03-15 13:54:14 +01:00
|
|
|
unsigned long amode31_lma = 0;
|
2024-03-22 14:39:57 +01:00
|
|
|
unsigned long max_physmem_end;
|
2022-12-13 11:35:11 +01:00
|
|
|
unsigned long asce_limit;
|
2023-02-02 13:59:36 +01:00
|
|
|
unsigned long safe_addr;
|
2022-12-13 11:35:11 +01:00
|
|
|
psw_t psw;
|
2018-07-19 13:11:28 +02:00
|
|
|
|
2024-03-22 14:39:57 +01:00
|
|
|
fixup_vmlinux_info();
|
2023-02-02 13:59:36 +01:00
|
|
|
setup_lpp();
|
2024-03-22 14:39:57 +01:00
|
|
|
safe_addr = PAGE_ALIGN(nokaslr_offset_phys + kernel_size);
|
2023-08-05 10:59:09 +02:00
|
|
|
|
2023-02-02 13:59:36 +01:00
|
|
|
/*
|
2024-03-22 14:39:57 +01:00
|
|
|
* Reserve decompressor memory together with decompression heap,
|
|
|
|
* buffer and memory which might be occupied by uncompressed kernel
|
|
|
|
* (if KASLR is off or failed).
|
2023-02-02 13:59:36 +01:00
|
|
|
*/
|
|
|
|
physmem_reserve(RR_DECOMPRESSOR, 0, safe_addr);
|
|
|
|
if (IS_ENABLED(CONFIG_BLK_DEV_INITRD) && parmarea.initrd_size)
|
|
|
|
physmem_reserve(RR_INITRD, parmarea.initrd_start, parmarea.initrd_size);
|
2021-06-15 14:25:41 +02:00
|
|
|
oldmem_data.start = parmarea.oldmem_base;
|
|
|
|
oldmem_data.size = parmarea.oldmem_size;
|
2021-06-15 14:15:07 +02:00
|
|
|
|
2019-02-21 14:23:04 +01:00
|
|
|
store_ipl_parmblock();
|
2023-02-02 13:59:36 +01:00
|
|
|
read_ipl_report();
|
2019-04-01 19:11:03 +02:00
|
|
|
uv_query_info();
|
2018-05-23 11:07:13 +02:00
|
|
|
sclp_early_read_info();
|
2018-05-15 13:28:53 +02:00
|
|
|
setup_boot_command_line();
|
2019-02-27 16:52:42 +01:00
|
|
|
parse_boot_command_line();
|
2023-02-02 19:21:38 +01:00
|
|
|
detect_facilities();
|
2023-10-27 14:12:36 +02:00
|
|
|
cmma_init();
|
2021-07-05 19:37:25 +02:00
|
|
|
sanitize_prot_virt_host();
|
2023-02-02 13:59:36 +01:00
|
|
|
max_physmem_end = detect_max_physmem_end();
|
2023-01-23 12:49:47 +01:00
|
|
|
setup_ident_map_size(max_physmem_end);
|
2020-09-18 12:25:37 +02:00
|
|
|
setup_vmalloc_size();
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
asce_limit = setup_kernel_memory_layout(kernel_size);
|
2023-02-02 13:59:36 +01:00
|
|
|
/* got final ident_map_size, physmem allocations could be performed now */
|
2023-02-08 18:11:25 +01:00
|
|
|
physmem_set_usable_limit(ident_map_size);
|
2023-02-02 13:59:36 +01:00
|
|
|
detect_physmem_online_ranges(max_physmem_end);
|
|
|
|
save_ipl_cert_comp_list();
|
|
|
|
rescue_initrd(safe_addr, ident_map_size);
|
2019-02-03 21:37:20 +01:00
|
|
|
|
2024-03-22 14:39:57 +01:00
|
|
|
if (kaslr_enabled())
|
|
|
|
__kaslr_offset_phys = randomize_within_range(kernel_size, THREAD_SIZE, 0, ident_map_size);
|
|
|
|
if (!__kaslr_offset_phys)
|
|
|
|
__kaslr_offset_phys = nokaslr_offset_phys;
|
|
|
|
kaslr_adjust_vmlinux_info(__kaslr_offset_phys);
|
|
|
|
physmem_reserve(RR_VMLINUX, __kaslr_offset_phys, kernel_size);
|
|
|
|
deploy_kernel((void *)__kaslr_offset_phys);
|
2023-02-02 13:59:36 +01:00
|
|
|
|
|
|
|
/* vmlinux decompression is done, shrink reserved low memory */
|
|
|
|
physmem_reserve(RR_DECOMPRESSOR, 0, (unsigned long)_decompressor_end);
|
2024-02-26 11:47:40 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* In case KASLR is enabled the randomized location of .amode31
|
|
|
|
* section might overlap with .vmlinux.relocs section. To avoid that
|
|
|
|
* the below randomize_within_range() could have been called with
|
|
|
|
* __vmlinux_relocs_64_end as the lower range address. However,
|
|
|
|
* .amode31 section is written to by the decompressed kernel - at
|
|
|
|
* that time the contents of .vmlinux.relocs is not needed anymore.
|
|
|
|
* Conversly, .vmlinux.relocs is read only by the decompressor, even
|
|
|
|
* before the kernel started. Therefore, in case the two sections
|
|
|
|
* overlap there is no risk of corrupting any data.
|
|
|
|
*/
|
2023-03-15 13:54:14 +01:00
|
|
|
if (kaslr_enabled())
|
|
|
|
amode31_lma = randomize_within_range(vmlinux.amode31_size, PAGE_SIZE, 0, SZ_2G);
|
2024-03-22 14:39:57 +01:00
|
|
|
if (!amode31_lma)
|
|
|
|
amode31_lma = __kaslr_offset_phys - vmlinux.amode31_size;
|
2023-03-15 11:00:19 +01:00
|
|
|
physmem_reserve(RR_AMODE31, amode31_lma, vmlinux.amode31_size);
|
2019-02-03 21:37:20 +01:00
|
|
|
|
2022-12-13 11:35:11 +01:00
|
|
|
/*
|
|
|
|
* The order of the following operations is important:
|
|
|
|
*
|
2024-02-21 14:46:53 +01:00
|
|
|
* - kaslr_adjust_relocs() must follow clear_bss_section() to establish
|
|
|
|
* static memory references to data in .bss to be used by setup_vmem()
|
2022-12-13 11:35:11 +01:00
|
|
|
* (i.e init_mm.pgd)
|
|
|
|
*
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
* - setup_vmem() must follow kaslr_adjust_relocs() to be able using
|
2022-12-13 11:35:11 +01:00
|
|
|
* static memory references to data in .bss (i.e init_mm.pgd)
|
|
|
|
*
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
* - copy_bootdata() must follow setup_vmem() to propagate changes
|
|
|
|
* to bootdata made by setup_vmem()
|
2022-12-13 11:35:11 +01:00
|
|
|
*/
|
2024-03-22 14:39:57 +01:00
|
|
|
clear_bss_section(__kaslr_offset_phys);
|
|
|
|
kaslr_adjust_relocs(__kaslr_offset_phys, __kaslr_offset_phys + vmlinux.image_size,
|
2024-02-20 14:35:43 +01:00
|
|
|
__kaslr_offset, __kaslr_offset_phys);
|
s390: compile relocatable kernel without -fPIE
On s390, currently kernel uses the '-fPIE' compiler flag for compiling
vmlinux. This has a few problems:
- It uses dynamic symbols (.dynsym), for which the linker refuses to
allow more than 64k sections. This can break features which use
'-ffunction-sections' and '-fdata-sections', including kpatch-build
[1] and Function Granular KASLR.
- It unnecessarily uses GOT relocations, adding an extra layer of
indirection for many memory accesses.
Instead of using '-fPIE', resolve all the relocations at link time and
then manually adjust any absolute relocations (R_390_64) during boot.
This is done by first telling the linker to preserve all relocations
during the vmlinux link. (Note this is harmless: they are later
stripped in the vmlinux.bin link.)
Then use the 'relocs' tool to find all absolute relocations (R_390_64)
which apply to allocatable sections. The offsets of those relocations
are saved in a special section which is then used to adjust the
relocations during boot.
(Note: For some reason, Clang occasionally creates a GOT reference, even
without '-fPIE'. So Clang-compiled kernels have a GOT, which needs to
be adjusted.)
On my mostly-defconfig kernel, this reduces kernel text size by ~1.3%.
[1] https://github.com/dynup/kpatch/issues/1284
[2] https://gcc.gnu.org/pipermail/gcc-patches/2023-June/622872.html
[3] https://gcc.gnu.org/pipermail/gcc-patches/2023-August/625986.html
Compiler consideration:
Gcc recently implemented an optimization [2] for loading symbols without
explicit alignment, aligning with the IBM Z ELF ABI. This ABI mandates
symbols to reside on a 2-byte boundary, enabling the use of the larl
instruction. However, kernel linker scripts may still generate unaligned
symbols. To address this, a new -munaligned-symbols option has been
introduced [3] in recent gcc versions. This option has to be used with
future gcc versions.
Older Clang lacks support for handling unaligned symbols generated
by kernel linker scripts when the kernel is built without -fPIE. However,
future versions of Clang will include support for the -munaligned-symbols
option. When the support is unavailable, compile the kernel with -fPIE
to maintain the existing behavior.
In addition to it:
move vmlinux.relocs to safe relocation
When the kernel is built with CONFIG_KERNEL_UNCOMPRESSED, the entire
uncompressed vmlinux.bin is positioned in the bzImage decompressor
image at the default kernel LMA of 0x100000, enabling it to be executed
in-place. However, the size of .vmlinux.relocs could be large enough to
cause an overlap with the uncompressed kernel at the address 0x100000.
To address this issue, .vmlinux.relocs is positioned after the
.rodata.compressed in the bzImage. Nevertheless, in this configuration,
vmlinux.relocs will overlap with the .bss section of vmlinux.bin. To
overcome that, move vmlinux.relocs to a safe location before clearing
.bss and handling relocs.
Compile warning fix from Sumanth Korikkar:
When kernel is built with CONFIG_LD_ORPHAN_WARN and -fno-PIE, there are
several warnings:
ld: warning: orphan section `.rela.iplt' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.head.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.init.text' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
ld: warning: orphan section `.rela.rodata.cst8' from
`arch/s390/kernel/head64.o' being placed in section `.rela.dyn'
Orphan sections are sections that exist in an object file but don't have
a corresponding output section in the final executable. ld raises a
warning when it identifies such sections.
Eliminate the warning by placing all .rela orphan sections in .rela.dyn
and raise an error when size of .rela.dyn is greater than zero. i.e.
Dont just neglect orphan sections.
This is similar to adjustment performed in x86, where kernel is built
with -fno-PIE.
commit 5354e84598f2 ("x86/build: Add asserts for unwanted sections")
[sumanthk@linux.ibm.com: rebased Josh Poimboeuf patches and move
vmlinux.relocs to safe location]
[hca@linux.ibm.com: merged compile warning fix from Sumanth]
Tested-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Acked-by: Vasily Gorbik <gor@linux.ibm.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20240219132734.22881-4-sumanthk@linux.ibm.com
Link: https://lore.kernel.org/r/20240219132734.22881-5-sumanthk@linux.ibm.com
Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
2024-02-19 14:27:33 +01:00
|
|
|
kaslr_adjust_got(__kaslr_offset);
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
setup_vmem(__kaslr_offset, __kaslr_offset + kernel_size, asce_limit);
|
2022-12-13 11:35:11 +01:00
|
|
|
copy_bootdata();
|
2019-02-03 21:37:20 +01:00
|
|
|
|
2023-02-02 13:59:36 +01:00
|
|
|
/*
|
|
|
|
* Save KASLR offset for early dumps, before vmcore_info is set.
|
|
|
|
* Mark as uneven to distinguish from real vmcore_info pointer.
|
|
|
|
*/
|
2024-02-20 14:35:43 +01:00
|
|
|
S390_lowcore.vmcore_info = __kaslr_offset_phys ? __kaslr_offset_phys | 0x1UL : 0;
|
2022-12-13 11:35:11 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Jump to the decompressed kernel entry point and switch DAT mode on.
|
|
|
|
*/
|
s390/mm: Uncouple physical vs virtual address spaces
The uncoupling physical vs virtual address spaces brings
the following benefits to s390:
- virtual memory layout flexibility;
- closes the address gap between kernel and modules, it
caused s390-only problems in the past (e.g. 'perf' bugs);
- allows getting rid of trampolines used for module calls
into kernel;
- allows simplifying BPF trampoline;
- minor performance improvement in branch prediction;
- kernel randomization entropy is magnitude bigger, as it is
derived from the amount of available virtual, not physical
memory;
The whole change could be described in two pictures below:
before and after the change.
Some aspects of the virtual memory layout setup are not
clarified (number of page levels, alignment, DMA memory),
since these are not a part of this change or secondary
with regard to how the uncoupling itself is implemented.
The focus of the pictures is to explain why __va() and __pa()
macros are implemented the way they are.
Memory layout in V==R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+ identity mapping start
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | identity | phys == virt
| | mapping | virt == phys
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt start
| | |
| | |
+- __kaslr_offset, __kaslr_offset_phys| kernel rand. phys/virt start
| | |
| kernel text/data | kernel text/data | phys == kvirt
| | |
+------------------+------------------+ kernel phys/virt end
| | |
| | |
| | |
| | |
+- ident_map_size -+- ident_map_size -+ identity mapping end
| |
| ... unused gap |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Memory layout in V!=R mode:
| Physical | Virtual |
+- 0 --------------+- 0 --------------+
| | S390_lowcore | Low-address memory
| +- 8 KB -----------+
| | |
| | |
| | ... unused gap |
| | |
+- AMODE31_START --+- AMODE31_START --+ .amode31 rand. phys/virt start
|.amode31 text/data|.amode31 text/data|
+- AMODE31_END ----+- AMODE31_END ----+ .amode31 rand. phys/virt end (<2GB)
| | |
| | |
+- __kaslr_offset_phys | kernel rand. phys start
| | |
| kernel text/data | |
| | |
+------------------+ | kernel phys end
| | |
| | |
| | |
| | |
+- ident_map_size -+ |
| |
| ... unused gap |
| |
+- __identity_base + identity mapping start (>= 2GB)
| |
| identity | phys == virt - __identity_base
| mapping | virt == phys + __identity_base
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
+---- vmemmap -----+ 'struct page' array start
| |
| virtually mapped |
| memory map |
| |
+- __abs_lowcore --+
| |
| Absolute Lowcore |
| |
+- __memcpy_real_area
| |
| Real Memory Copy|
| |
+- VMALLOC_START --+ vmalloc area start
| |
| vmalloc area |
| |
+- MODULES_VADDR --+ modules area start
| |
| modules area |
| |
+- __kaslr_offset -+ kernel rand. virt start
| |
| kernel text/data | phys == (kvirt - __kaslr_offset) +
| | __kaslr_offset_phys
+- kernel .bss end + kernel rand. virt end
| |
| ... unused gap |
| |
+------------------+ UltraVisor Secure Storage limit
| |
| ... unused gap |
| |
+KASAN_SHADOW_START+ KASAN shadow memory start
| |
| KASAN shadow |
| |
+------------------+ ASCE limit
Unused gaps in the virtual memory layout could be present
or not - depending on how partucular system is configured.
No page tables are created for the unused gaps.
The relative order of vmalloc, modules and kernel image in
virtual memory is defined by following considerations:
- start of the modules area and end of the kernel should reside
within 4GB to accommodate relative 32-bit jumps. The best way
to achieve that is to place kernel next to modules;
- vmalloc and module areas should locate next to each other
to prevent failures and extra reworks in user level tools
(makedumpfile, crash, etc.) which treat vmalloc and module
addresses similarily;
- kernel needs to be the last area in the virtual memory
layout to easily distinguish between kernel and non-kernel
virtual addresses. That is needed to (again) simplify
handling of addresses in user level tools and make __pa()
macro faster (see below);
Concluding the above, the relative order of the considered
virtual areas in memory is: vmalloc - modules - kernel.
Therefore, the only change to the current memory layout is
moving kernel to the end of virtual address space.
With that approach the implementation of __pa() macro is
straightforward - all linear virtual addresses less than
kernel base are considered identity mapping:
phys == virt - __identity_base
All addresses greater than kernel base are kernel ones:
phys == (kvirt - __kaslr_offset) + __kaslr_offset_phys
By contrast, __va() macro deals only with identity mapping
addresses:
virt == phys + __identity_base
.amode31 section is mapped separately and is not covered by
__pa() macro. In fact, it could have been handled easily by
checking whether a virtual address is within the section or
not, but there is no need for that. Thus, let __pa() code
do as little machine cycles as possible.
The KASAN shadow memory is located at the very end of the
virtual memory layout, at addresses higher than the kernel.
However, that is not a linear mapping and no code other than
KASAN instrumentation or API is expected to access it.
When KASLR mode is enabled the kernel base address randomized
within a memory window that spans whole unused virtual address
space. The size of that window depends from the amount of
physical memory available to the system, the limit imposed by
UltraVisor (if present) and the vmalloc area size as provided
by vmalloc= kernel command line parameter.
In case the virtual memory is exhausted the minimum size of
the randomization window is forcefully set to 2GB, which
amounts to in 15 bits of entropy if KASAN is enabled or 17
bits of entropy in default configuration.
The default kernel offset 0x100000 is used as a magic value
both in the decompressor code and vmlinux linker script, but
it will be removed with a follow-up change.
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
2024-03-01 07:15:22 +01:00
|
|
|
psw.addr = __kaslr_offset + vmlinux.entry;
|
2022-12-13 11:35:11 +01:00
|
|
|
psw.mask = PSW_KERNEL_BITS;
|
|
|
|
__load_psw(psw);
|
2018-07-19 13:11:28 +02:00
|
|
|
}
|