From 13f62e84385fa0241fc6a2178da50af02189121b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Nov 2022 15:16:43 +0100 Subject: [PATCH 01/26] s390/cmpxchg: use symbolic names for inline assembly operands Make cmpxchg() inline assemblies more readable by using symbolic names for operands. Link: https://lore.kernel.org/r/Y2J7yzQYt/bjLQXY@osiris Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cmpxchg.h | 76 ++++++++++++++++++--------------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 84c3f0d576c5..56fb8aa08945 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -96,56 +96,64 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, shift = (3 ^ (address & 3)) << 3; address ^= address & 3; asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" + " l %[prev],%[address]\n" + "0: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + " cs %[prev],%[tmp],%[address]\n" + " jnl 1f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 0b\n" "1:" - : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address) - : "d" ((old & 0xff) << shift), - "d" ((new & 0xff) << shift), - "d" (~(0xff << shift)) + : [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" ((old & 0xff) << shift), + [new] "d" ((new & 0xff) << shift), + [mask] "d" (~(0xff << shift)) : "memory", "cc"); return prev >> shift; case 2: shift = (2 ^ (address & 2)) << 3; address ^= address & 2; asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" + " l %[prev],%[address]\n" + "0: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + " cs %[prev],%[tmp],%[address]\n" + " jnl 1f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 0b\n" "1:" - : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address) - : "d" ((old & 0xffff) << shift), - "d" ((new & 0xffff) << shift), - "d" (~(0xffff << shift)) + : [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" ((old & 0xffff) << shift), + [new] "d" ((new & 0xffff) << shift), + [mask] "d" (~(0xffff << shift)) : "memory", "cc"); return prev >> shift; case 4: asm volatile( - " cs %0,%3,%1\n" - : "=&d" (prev), "+Q" (*(int *) address) - : "0" (old), "d" (new) + " cs %[prev],%[new],%[address]\n" + : [prev] "=&d" (prev), + [address] "+Q" (*(int *)address) + : "0" (old), + [new] "d" (new) : "memory", "cc"); return prev; case 8: asm volatile( - " csg %0,%3,%1\n" - : "=&d" (prev), "+QS" (*(long *) address) - : "0" (old), "d" (new) + " csg %[prev],%[new],%[address]\n" + : [prev] "=&d" (prev), + [address] "+QS" (*(long *)address) + : "0" (old), + [new] "d" (new) : "memory", "cc"); return prev; } From ce968f654570dbd9cac7de694681640061559d3b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Nov 2022 15:17:28 +0100 Subject: [PATCH 02/26] s390/cmpxchg: make variables local to each case label Make variables local to each case label. This limits the scope of variables and allows to use proper types everywhere. Link: https://lore.kernel.org/r/Y2J7+HqgAZwnfxsh@osiris Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cmpxchg.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 56fb8aa08945..02165acdaa93 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -88,11 +88,10 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, unsigned long old, unsigned long new, int size) { - unsigned long prev, tmp; - int shift; - switch (size) { - case 1: + case 1: { + unsigned int prev, tmp, shift; + shift = (3 ^ (address & 3)) << 3; address ^= address & 3; asm volatile( @@ -115,7 +114,10 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, [mask] "d" (~(0xff << shift)) : "memory", "cc"); return prev >> shift; - case 2: + } + case 2: { + unsigned int prev, tmp, shift; + shift = (2 ^ (address & 2)) << 3; address ^= address & 2; asm volatile( @@ -138,16 +140,22 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, [mask] "d" (~(0xffff << shift)) : "memory", "cc"); return prev >> shift; - case 4: + } + case 4: { + unsigned int prev; + asm volatile( " cs %[prev],%[new],%[address]\n" : [prev] "=&d" (prev), [address] "+Q" (*(int *)address) - : "0" (old), + : "0" ((unsigned int)old), [new] "d" (new) : "memory", "cc"); return prev; - case 8: + } + case 8: { + unsigned long prev; + asm volatile( " csg %[prev],%[new],%[address]\n" : [prev] "=&d" (prev), @@ -157,6 +165,7 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, : "memory", "cc"); return prev; } + } __cmpxchg_called_with_bad_pointer(); return old; } From e388d66f032166c8e8c4efd9e6c6f43610378903 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Nov 2022 15:18:07 +0100 Subject: [PATCH 03/26] s390/cmpxchg: remove digits from input constraints Instead of using a digit for input constraints simply initialize the corresponding output operand in C code and use a "+" constraint modifier. Link: https://lore.kernel.org/r/Y2J8H82B6JhJhrp2@osiris Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cmpxchg.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 02165acdaa93..1c5785b851ec 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -142,26 +142,24 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, return prev >> shift; } case 4: { - unsigned int prev; + unsigned int prev = old; asm volatile( " cs %[prev],%[new],%[address]\n" - : [prev] "=&d" (prev), + : [prev] "+&d" (prev), [address] "+Q" (*(int *)address) - : "0" ((unsigned int)old), - [new] "d" (new) + : [new] "d" (new) : "memory", "cc"); return prev; } case 8: { - unsigned long prev; + unsigned long prev = old; asm volatile( " csg %[prev],%[new],%[address]\n" - : [prev] "=&d" (prev), + : [prev] "+&d" (prev), [address] "+QS" (*(long *)address) - : "0" (old), - [new] "d" (new) + : [new] "d" (new) : "memory", "cc"); return prev; } From f39a8c4a22ea104c368361b9ae0f550da161db2d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Nov 2022 15:18:45 +0100 Subject: [PATCH 04/26] s390/extable: add EX_TABLE_UA_LOAD_REGPAIR() macro Add new exception table type which is able to handle register pairs. If an exception is recognized on such an instruction the specified register pair will be zeroed, and the specified error register will be modified so it contains -EFAULT, similar to the existing EX_TABLE_UA_LOAD_REG() macro. Link: https://lore.kernel.org/r/Y2J8RSW2khWLgpPo@osiris Signed-off-by: Heiko Carstens --- arch/s390/include/asm/asm-extable.h | 4 ++++ arch/s390/mm/extable.c | 9 +++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h index b74f1070ddb2..55a02a153dfc 100644 --- a/arch/s390/include/asm/asm-extable.h +++ b/arch/s390/include/asm/asm-extable.h @@ -12,6 +12,7 @@ #define EX_TYPE_UA_STORE 3 #define EX_TYPE_UA_LOAD_MEM 4 #define EX_TYPE_UA_LOAD_REG 5 +#define EX_TYPE_UA_LOAD_REGPAIR 6 #define EX_DATA_REG_ERR_SHIFT 0 #define EX_DATA_REG_ERR GENMASK(3, 0) @@ -85,4 +86,7 @@ #define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0) +#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \ + __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0) + #endif /* __ASM_EXTABLE_H */ diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c index 1e4d2187541a..fe87291df95d 100644 --- a/arch/s390/mm/extable.c +++ b/arch/s390/mm/extable.c @@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc return true; } -static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs) +static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, + bool pair, struct pt_regs *regs) { unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data); unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data); regs->gprs[reg_err] = -EFAULT; regs->gprs[reg_zero] = 0; + if (pair) + regs->gprs[reg_zero + 1] = 0; regs->psw.addr = extable_fixup(ex); return true; } @@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs) case EX_TYPE_UA_LOAD_MEM: return ex_handler_ua_load_mem(ex, regs); case EX_TYPE_UA_LOAD_REG: - return ex_handler_ua_load_reg(ex, regs); + return ex_handler_ua_load_reg(ex, false, regs); + case EX_TYPE_UA_LOAD_REGPAIR: + return ex_handler_ua_load_reg(ex, true, regs); } panic("invalid exception table entry"); } From 4148575abe1e14af3cb9fd1a3c9c2a708ec0b1f9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Nov 2022 15:19:23 +0100 Subject: [PATCH 05/26] s390/uaccess: add cmpxchg_user_key() Add cmpxchg_user_key() which allows to execute a compare and exchange on a user space address. This allows also to specify a storage key which makes sure that key-controlled protection is considered. This is based on a patch written by Janis Schoetterl-Glausch. Link: https://lore.kernel.org/all/20220930210751.225873-2-scgl@linux.ibm.com Cc: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/Y2J8axs+bcQ2dO/l@osiris Signed-off-by: Heiko Carstens --- arch/s390/include/asm/uaccess.h | 183 ++++++++++++++++++++++++++++++++ 1 file changed, 183 insertions(+) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index f7038b800cc3..a125e60a1521 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -390,4 +390,187 @@ do { \ goto err_label; \ } while (0) +void __cmpxchg_user_key_called_with_bad_pointer(void); + +static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, + __uint128_t old, __uint128_t new, + unsigned long key, int size) +{ + int rc = 0; + + switch (size) { + case 1: { + unsigned int prev, tmp, shift; + + shift = (3 ^ (address & 3)) << 3; + address ^= address & 3; + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: l %[prev],%[address]\n" + "1: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + "2: cs %[prev],%[tmp],%[address]\n" + "3: jnl 4f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 1b\n" + "4: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" (((unsigned int)old & 0xff) << shift), + [new] "d" (((unsigned int)new & 0xff) << shift), + [mask] "d" (~(0xff << shift)), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned char *)uval = prev >> shift; + return rc; + } + case 2: { + unsigned int prev, tmp, shift; + + shift = (2 ^ (address & 2)) << 3; + address ^= address & 2; + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: l %[prev],%[address]\n" + "1: nr %[prev],%[mask]\n" + " lr %[tmp],%[prev]\n" + " or %[prev],%[old]\n" + " or %[tmp],%[new]\n" + "2: cs %[prev],%[tmp],%[address]\n" + "3: jnl 4f\n" + " xr %[tmp],%[prev]\n" + " nr %[tmp],%[mask]\n" + " jnz 1b\n" + "4: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "=&d" (prev), + [tmp] "=&d" (tmp), + [address] "+Q" (*(int *)address) + : [old] "d" (((unsigned int)old & 0xffff) << shift), + [new] "d" (((unsigned int)new & 0xffff) << shift), + [mask] "d" (~(0xffff << shift)), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned short *)uval = prev >> shift; + return rc; + } + case 4: { + unsigned int prev = old; + + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: cs %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+Q" (*(int *)address) + : [new] "d" ((unsigned int)new), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned int *)uval = prev; + return rc; + } + case 8: { + unsigned long prev = old; + + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: csg %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+QS" (*(long *)address) + : [new] "d" ((unsigned long)new), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(unsigned long *)uval = prev; + return rc; + } + case 16: { + __uint128_t prev = old; + + asm volatile( + " spka 0(%[key])\n" + " sacf 256\n" + "0: cdsg %[prev],%[new],%[address]\n" + "1: sacf 768\n" + " spka %[default_key]\n" + EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev]) + : [rc] "+&d" (rc), + [prev] "+&d" (prev), + [address] "+QS" (*(__int128_t *)address) + : [new] "d" (new), + [key] "a" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY) + : "memory", "cc"); + *(__uint128_t *)uval = prev; + return rc; + } + } + __cmpxchg_user_key_called_with_bad_pointer(); + return rc; +} + +/** + * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys + * @ptr: User space address of value to compare to @old and exchange with + * @new. Must be aligned to sizeof(*@ptr). + * @uval: Address where the old value of *@ptr is written to. + * @old: Old value. Compared to the content pointed to by @ptr in order to + * determine if the exchange occurs. The old value read from *@ptr is + * written to *@uval. + * @new: New value to place at *@ptr. + * @key: Access key to use for checking storage key protection. + * + * Perform a cmpxchg on a user space target, honoring storage key protection. + * @key alone determines how key checking is performed, neither + * storage-protection-override nor fetch-protection-override apply. + * The caller must compare *@uval and @old to determine if values have been + * exchanged. In case of an exception *@uval is set to zero. + * + * Return: 0: cmpxchg executed + * -EFAULT: an exception happened when trying to access *@ptr + */ +#define cmpxchg_user_key(ptr, uval, old, new, key) \ +({ \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(uval) __uval = (uval); \ + \ + BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \ + might_fault(); \ + __chk_user_ptr(__ptr); \ + __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \ + (old), (new), (key), sizeof(*(__ptr))); \ +}) + #endif /* __S390_UACCESS_H */ From 51098f0eb22e2f54055d75dd25bc84eff07d6d8a Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Wed, 16 Nov 2022 15:47:11 +0100 Subject: [PATCH 06/26] s390/cmpxchg: make loop condition for 1,2 byte cases precise The cmpxchg implementation for 1 and 2 bytes consists of a 4 byte cmpxchg loop. Currently, the decision to retry is imprecise, looping if bits outside the target byte(s) change instead of retrying until the target byte(s) differ from the old value. E.g. if an attempt to exchange (prev_left_0 old_bytes prev_right_0) is made and it fails because the word at the address is (prev_left_1 x prev_right_1) where both x != old_bytes and one of the prev_*_1 values differs from the respective prev_*_0 value, the cmpxchg is retried, even if by a semantic equivalent to a normal cmpxchg, the exchange would fail. Instead exit the loop if x != old_bytes and retry otherwise. Signed-off-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20221116144711.3811011-1-scgl@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/include/asm/cmpxchg.h | 60 ++++++++++++++----------- arch/s390/include/asm/uaccess.h | 80 ++++++++++++++++++--------------- 2 files changed, 78 insertions(+), 62 deletions(-) diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 1c5785b851ec..3f26416c2ad8 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -90,55 +90,63 @@ static __always_inline unsigned long __cmpxchg(unsigned long address, { switch (size) { case 1: { - unsigned int prev, tmp, shift; + unsigned int prev, shift, mask; shift = (3 ^ (address & 3)) << 3; address ^= address & 3; + old = (old & 0xff) << shift; + new = (new & 0xff) << shift; + mask = ~(0xff << shift); asm volatile( " l %[prev],%[address]\n" - "0: nr %[prev],%[mask]\n" - " lr %[tmp],%[prev]\n" - " or %[prev],%[old]\n" - " or %[tmp],%[new]\n" - " cs %[prev],%[tmp],%[address]\n" + " nr %[prev],%[mask]\n" + " xilf %[mask],0xffffffff\n" + " or %[new],%[prev]\n" + " or %[prev],%[tmp]\n" + "0: lr %[tmp],%[prev]\n" + " cs %[prev],%[new],%[address]\n" " jnl 1f\n" " xr %[tmp],%[prev]\n" + " xr %[new],%[tmp]\n" " nr %[tmp],%[mask]\n" - " jnz 0b\n" + " jz 0b\n" "1:" : [prev] "=&d" (prev), - [tmp] "=&d" (tmp), - [address] "+Q" (*(int *)address) - : [old] "d" ((old & 0xff) << shift), - [new] "d" ((new & 0xff) << shift), - [mask] "d" (~(0xff << shift)) - : "memory", "cc"); + [address] "+Q" (*(int *)address), + [tmp] "+&d" (old), + [new] "+&d" (new), + [mask] "+&d" (mask) + :: "memory", "cc"); return prev >> shift; } case 2: { - unsigned int prev, tmp, shift; + unsigned int prev, shift, mask; shift = (2 ^ (address & 2)) << 3; address ^= address & 2; + old = (old & 0xffff) << shift; + new = (new & 0xffff) << shift; + mask = ~(0xffff << shift); asm volatile( " l %[prev],%[address]\n" - "0: nr %[prev],%[mask]\n" - " lr %[tmp],%[prev]\n" - " or %[prev],%[old]\n" - " or %[tmp],%[new]\n" - " cs %[prev],%[tmp],%[address]\n" + " nr %[prev],%[mask]\n" + " xilf %[mask],0xffffffff\n" + " or %[new],%[prev]\n" + " or %[prev],%[tmp]\n" + "0: lr %[tmp],%[prev]\n" + " cs %[prev],%[new],%[address]\n" " jnl 1f\n" " xr %[tmp],%[prev]\n" + " xr %[new],%[tmp]\n" " nr %[tmp],%[mask]\n" - " jnz 0b\n" + " jz 0b\n" "1:" : [prev] "=&d" (prev), - [tmp] "=&d" (tmp), - [address] "+Q" (*(int *)address) - : [old] "d" ((old & 0xffff) << shift), - [new] "d" ((new & 0xffff) << shift), - [mask] "d" (~(0xffff << shift)) - : "memory", "cc"); + [address] "+Q" (*(int *)address), + [tmp] "+&d" (old), + [new] "+&d" (new), + [mask] "+&d" (mask) + :: "memory", "cc"); return prev >> shift; } case 4: { diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index a125e60a1521..d028ee59e941 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -400,74 +400,82 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, switch (size) { case 1: { - unsigned int prev, tmp, shift; + unsigned int prev, shift, mask, _old, _new; shift = (3 ^ (address & 3)) << 3; address ^= address & 3; + _old = (old & 0xff) << shift; + _new = (new & 0xff) << shift; + mask = ~(0xff << shift); asm volatile( " spka 0(%[key])\n" " sacf 256\n" "0: l %[prev],%[address]\n" "1: nr %[prev],%[mask]\n" - " lr %[tmp],%[prev]\n" - " or %[prev],%[old]\n" - " or %[tmp],%[new]\n" - "2: cs %[prev],%[tmp],%[address]\n" - "3: jnl 4f\n" + " xilf %[mask],0xffffffff\n" + " or %[new],%[prev]\n" + " or %[prev],%[tmp]\n" + "2: lr %[tmp],%[prev]\n" + "3: cs %[prev],%[new],%[address]\n" + "4: jnl 5f\n" " xr %[tmp],%[prev]\n" + " xr %[new],%[tmp]\n" " nr %[tmp],%[mask]\n" - " jnz 1b\n" - "4: sacf 768\n" + " jz 2b\n" + "5: sacf 768\n" " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev]) : [rc] "+&d" (rc), [prev] "=&d" (prev), - [tmp] "=&d" (tmp), - [address] "+Q" (*(int *)address) - : [old] "d" (((unsigned int)old & 0xff) << shift), - [new] "d" (((unsigned int)new & 0xff) << shift), - [mask] "d" (~(0xff << shift)), - [key] "a" (key << 4), + [address] "+Q" (*(int *)address), + [tmp] "+&d" (_old), + [new] "+&d" (_new), + [mask] "+&d" (mask) + : [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); *(unsigned char *)uval = prev >> shift; return rc; } case 2: { - unsigned int prev, tmp, shift; + unsigned int prev, shift, mask, _old, _new; shift = (2 ^ (address & 2)) << 3; address ^= address & 2; + _old = (old & 0xffff) << shift; + _new = (new & 0xffff) << shift; + mask = ~(0xffff << shift); asm volatile( " spka 0(%[key])\n" " sacf 256\n" "0: l %[prev],%[address]\n" "1: nr %[prev],%[mask]\n" - " lr %[tmp],%[prev]\n" - " or %[prev],%[old]\n" - " or %[tmp],%[new]\n" - "2: cs %[prev],%[tmp],%[address]\n" - "3: jnl 4f\n" + " xilf %[mask],0xffffffff\n" + " or %[new],%[prev]\n" + " or %[prev],%[tmp]\n" + "2: lr %[tmp],%[prev]\n" + "3: cs %[prev],%[new],%[address]\n" + "4: jnl 5f\n" " xr %[tmp],%[prev]\n" + " xr %[new],%[tmp]\n" " nr %[tmp],%[mask]\n" - " jnz 1b\n" - "4: sacf 768\n" + " jz 2b\n" + "5: sacf 768\n" " spka %[default_key]\n" - EX_TABLE_UA_LOAD_REG(0b, 4b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(1b, 4b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(2b, 4b, %[rc], %[prev]) - EX_TABLE_UA_LOAD_REG(3b, 4b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev]) + EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev]) : [rc] "+&d" (rc), [prev] "=&d" (prev), - [tmp] "=&d" (tmp), - [address] "+Q" (*(int *)address) - : [old] "d" (((unsigned int)old & 0xffff) << shift), - [new] "d" (((unsigned int)new & 0xffff) << shift), - [mask] "d" (~(0xffff << shift)), - [key] "a" (key << 4), + [address] "+Q" (*(int *)address), + [tmp] "+&d" (_old), + [new] "+&d" (_new), + [mask] "+&d" (mask) + : [key] "a" (key << 4), [default_key] "J" (PAGE_DEFAULT_KEY) : "memory", "cc"); *(unsigned short *)uval = prev >> shift; From 739ad2e4e15b585a0eaf98b7bdee62b2dd9588c9 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Thu, 17 Nov 2022 11:07:45 +0100 Subject: [PATCH 07/26] s390/uaccess: limit number of retries for cmpxchg_user_key() cmpxchg_user_key() for byte and short values is implemented via a one word cmpxchg loop. Give up trying to perform the cmpxchg if it fails too often because of contention on the cache line. This ensures that the thread cannot become stuck in the kernel. Signed-off-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20221117100745.3253896-1-scgl@linux.ibm.com Signed-off-by: Heiko Carstens --- arch/s390/include/asm/uaccess.h | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index d028ee59e941..7c10f594e747 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -392,6 +392,8 @@ do { \ void __cmpxchg_user_key_called_with_bad_pointer(void); +#define CMPXCHG_USER_KEY_MAX_LOOPS 128 + static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, __uint128_t old, __uint128_t new, unsigned long key, int size) @@ -401,6 +403,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, switch (size) { case 1: { unsigned int prev, shift, mask, _old, _new; + unsigned long count; shift = (3 ^ (address & 3)) << 3; address ^= address & 3; @@ -410,6 +413,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, asm volatile( " spka 0(%[key])\n" " sacf 256\n" + " llill %[count],%[max_loops]\n" "0: l %[prev],%[address]\n" "1: nr %[prev],%[mask]\n" " xilf %[mask],0xffffffff\n" @@ -421,7 +425,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, " xr %[tmp],%[prev]\n" " xr %[new],%[tmp]\n" " nr %[tmp],%[mask]\n" - " jz 2b\n" + " jnz 5f\n" + " brct %[count],2b\n" "5: sacf 768\n" " spka %[default_key]\n" EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) @@ -433,15 +438,20 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [address] "+Q" (*(int *)address), [tmp] "+&d" (_old), [new] "+&d" (_new), - [mask] "+&d" (mask) - : [key] "a" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY) + [mask] "+&d" (mask), + [count] "=a" (count) + : [key] "%[count]" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY), + [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) : "memory", "cc"); *(unsigned char *)uval = prev >> shift; + if (!count) + rc = -EAGAIN; return rc; } case 2: { unsigned int prev, shift, mask, _old, _new; + unsigned long count; shift = (2 ^ (address & 2)) << 3; address ^= address & 2; @@ -451,6 +461,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, asm volatile( " spka 0(%[key])\n" " sacf 256\n" + " llill %[count],%[max_loops]\n" "0: l %[prev],%[address]\n" "1: nr %[prev],%[mask]\n" " xilf %[mask],0xffffffff\n" @@ -462,7 +473,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, " xr %[tmp],%[prev]\n" " xr %[new],%[tmp]\n" " nr %[tmp],%[mask]\n" - " jz 2b\n" + " jnz 5f\n" + " brct %[count],2b\n" "5: sacf 768\n" " spka %[default_key]\n" EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev]) @@ -474,11 +486,15 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, [address] "+Q" (*(int *)address), [tmp] "+&d" (_old), [new] "+&d" (_new), - [mask] "+&d" (mask) - : [key] "a" (key << 4), - [default_key] "J" (PAGE_DEFAULT_KEY) + [mask] "+&d" (mask), + [count] "=a" (count) + : [key] "%[count]" (key << 4), + [default_key] "J" (PAGE_DEFAULT_KEY), + [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS) : "memory", "cc"); *(unsigned short *)uval = prev >> shift; + if (!count) + rc = -EAGAIN; return rc; } case 4: { @@ -568,6 +584,7 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, * * Return: 0: cmpxchg executed * -EFAULT: an exception happened when trying to access *@ptr + * -EAGAIN: maxed out number of retries (byte and short only) */ #define cmpxchg_user_key(ptr, uval, old, new, key) \ ({ \ From b33d59fb37ddcb6ee65d4fa23cc3d58793d13c5b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 4 Jan 2023 16:55:53 +0100 Subject: [PATCH 08/26] s390/uaccess: avoid __ashlti3() call __cmpxchg_user_key() uses 128 bit types which, depending on compiler and config options, may lead to an __ashlti3() library call. Get rid of that by simply casting the 128 bit values to 32 bit values. Reported-by: kernel test robot Suggested-by: Janis Schoetterl-Glausch Fixes: 51098f0eb22e ("s390/cmpxchg: make loop condition for 1,2 byte cases precise") Link: https://lore.kernel.org/all/4b96b112d5415d08a81d30657feec2c8c3000f7c.camel@linux.ibm.com/ Signed-off-by: Heiko Carstens --- arch/s390/include/asm/uaccess.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h index 7c10f594e747..8a8c64a678c4 100644 --- a/arch/s390/include/asm/uaccess.h +++ b/arch/s390/include/asm/uaccess.h @@ -407,8 +407,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, shift = (3 ^ (address & 3)) << 3; address ^= address & 3; - _old = (old & 0xff) << shift; - _new = (new & 0xff) << shift; + _old = ((unsigned int)old & 0xff) << shift; + _new = ((unsigned int)new & 0xff) << shift; mask = ~(0xff << shift); asm volatile( " spka 0(%[key])\n" @@ -455,8 +455,8 @@ static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval, shift = (2 ^ (address & 2)) << 3; address ^= address & 2; - _old = (old & 0xffff) << shift; - _new = (new & 0xffff) << shift; + _old = ((unsigned int)old & 0xffff) << shift; + _new = ((unsigned int)new & 0xffff) << shift; mask = ~(0xffff << shift); asm volatile( " spka 0(%[key])\n" From f2d3155e2a6bac44d16f04415a321e8707d895c6 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Fri, 27 Jan 2023 15:05:32 +0100 Subject: [PATCH 09/26] KVM: s390: disable migration mode when dirty tracking is disabled Migration mode is a VM attribute which enables tracking of changes in storage attributes (PGSTE). It assumes dirty tracking is enabled on all memslots to keep a dirty bitmap of pages with changed storage attributes. When enabling migration mode, we currently check that dirty tracking is enabled for all memslots. However, userspace can disable dirty tracking without disabling migration mode. Since migration mode is pointless with dirty tracking disabled, disable migration mode whenever userspace disables dirty tracking on any slot. Also update the documentation to clarify that dirty tracking must be enabled when enabling migration mode, which is already enforced by the code in kvm_s390_vm_start_migration(). Also highlight in the documentation for KVM_S390_GET_CMMA_BITS that it can now fail with -EINVAL when dirty tracking is disabled while migration mode is on. Move all the error codes to a table so this stays readable. To disable migration mode, slots_lock should be held, which is taken in kvm_set_memory_region() and thus held in kvm_arch_prepare_memory_region(). Restructure the prepare code a bit so all the sanity checking is done before disabling migration mode. This ensures migration mode isn't disabled when some sanity check fails. Cc: stable@vger.kernel.org Fixes: 190df4a212a7 ("KVM: s390: CMMA tracking, ESSA emulation, migration mode") Signed-off-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20230127140532.230651-2-nrb@linux.ibm.com Message-Id: <20230127140532.230651-2-nrb@linux.ibm.com> [frankja@linux.ibm.com: fixed commit message typo, moved api.rst error table upwards] Signed-off-by: Janosch Frank --- Documentation/virt/kvm/api.rst | 18 ++++++---- Documentation/virt/kvm/devices/vm.rst | 4 +++ arch/s390/kvm/kvm-s390.c | 47 ++++++++++++++++++--------- 3 files changed, 48 insertions(+), 21 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index deb494f759ed..8cd7fd05d53b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4449,6 +4449,18 @@ not holding a previously reported uncorrected error). :Parameters: struct kvm_s390_cmma_log (in, out) :Returns: 0 on success, a negative value on error +Errors: + + ====== ============================================================= + ENOMEM not enough memory can be allocated to complete the task + ENXIO if CMMA is not enabled + EINVAL if KVM_S390_CMMA_PEEK is not set but migration mode was not enabled + EINVAL if KVM_S390_CMMA_PEEK is not set but dirty tracking has been + disabled (and thus migration mode was automatically disabled) + EFAULT if the userspace address is invalid or if no page table is + present for the addresses (e.g. when using hugepages). + ====== ============================================================= + This ioctl is used to get the values of the CMMA bits on the s390 architecture. It is meant to be used in two scenarios: @@ -4529,12 +4541,6 @@ mask is unused. values points to the userspace buffer where the result will be stored. -This ioctl can fail with -ENOMEM if not enough memory can be allocated to -complete the task, with -ENXIO if CMMA is not enabled, with -EINVAL if -KVM_S390_CMMA_PEEK is not set but migration mode was not enabled, with --EFAULT if the userspace address is invalid or if no page table is -present for the addresses (e.g. when using hugepages). - 4.108 KVM_S390_SET_CMMA_BITS ---------------------------- diff --git a/Documentation/virt/kvm/devices/vm.rst b/Documentation/virt/kvm/devices/vm.rst index 60acc39e0e93..147efec626e5 100644 --- a/Documentation/virt/kvm/devices/vm.rst +++ b/Documentation/virt/kvm/devices/vm.rst @@ -302,6 +302,10 @@ Allows userspace to start migration mode, needed for PGSTE migration. Setting this attribute when migration mode is already active will have no effects. +Dirty tracking must be enabled on all memslots, else -EINVAL is returned. When +dirty tracking is disabled on any memslot, migration mode is automatically +stopped. + :Parameters: none :Returns: -ENOMEM if there is not enough free memory to start migration mode; -EINVAL if the state of the VM is invalid (e.g. no memory defined); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e4890e04b210..cb72f9a09fb3 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5633,23 +5633,40 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; - if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY) + if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) { + /* + * A few sanity checks. We can have memory slots which have to be + * located/ended at a segment boundary (1MB). The memory in userland is + * ok to be fragmented into various different vmas. It is okay to mmap() + * and munmap() stuff in this slot after doing this call at any time + */ + + if (new->userspace_addr & 0xffffful) + return -EINVAL; + + size = new->npages * PAGE_SIZE; + if (size & 0xffffful) + return -EINVAL; + + if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) + return -EINVAL; + } + + if (!kvm->arch.migration_mode) return 0; - /* A few sanity checks. We can have memory slots which have to be - located/ended at a segment boundary (1MB). The memory in userland is - ok to be fragmented into various different vmas. It is okay to mmap() - and munmap() stuff in this slot after doing this call at any time */ - - if (new->userspace_addr & 0xffffful) - return -EINVAL; - - size = new->npages * PAGE_SIZE; - if (size & 0xffffful) - return -EINVAL; - - if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit) - return -EINVAL; + /* + * Turn off migration mode when: + * - userspace creates a new memslot with dirty logging off, + * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and + * dirty logging is turned off. + * Migration mode expects dirty page logging being enabled to store + * its dirty bitmap. + */ + if (change != KVM_MR_DELETE && + !(new->flags & KVM_MEM_LOG_DIRTY_PAGES)) + WARN(kvm_s390_vm_stop_migration(kvm), + "Failed to stop migration mode"); return 0; } From d7b9dc14031b7f8865aeedc90d8bc68a4bb16023 Mon Sep 17 00:00:00 2001 From: Nina Schoetterl-Glausch Date: Fri, 27 Jan 2023 18:45:52 +0100 Subject: [PATCH 10/26] KVM: selftests: Compile s390 tests with -march=z10 The guest used in s390 kvm selftests is not be set up to handle all instructions the compiler might emit, i.e. vector instructions, leading to crashes. Limit what the compiler emits to the oldest machine model currently supported by Linux. Signed-off-by: Nina Schoetterl-Glausch Reviewed-by: Janosch Frank Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20230127174552.3370169-1-nsg@linux.ibm.com Message-Id: <20230127174552.3370169-1-nsg@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/Makefile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 1750f91dd936..df0989949eb5 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -200,6 +200,9 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \ -I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \ -I$( Date: Mon, 6 Feb 2023 17:45:49 +0100 Subject: [PATCH 11/26] KVM: s390: selftest: memop: Pass mop_desc via pointer The struct is quite large, so this seems nicer. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Thomas Huth Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-2-scgl@linux.ibm.com Message-Id: <20230206164602.138068-2-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 44 +++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 3fd81e58f40c..9c05d1205114 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -48,53 +48,53 @@ struct mop_desc { uint8_t key; }; -static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc desc) +static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) { struct kvm_s390_mem_op ksmo = { - .gaddr = (uintptr_t)desc.gaddr, - .size = desc.size, - .buf = ((uintptr_t)desc.buf), + .gaddr = (uintptr_t)desc->gaddr, + .size = desc->size, + .buf = ((uintptr_t)desc->buf), .reserved = "ignored_ignored_ignored_ignored" }; - switch (desc.target) { + switch (desc->target) { case LOGICAL: - if (desc.mode == READ) + if (desc->mode == READ) ksmo.op = KVM_S390_MEMOP_LOGICAL_READ; - if (desc.mode == WRITE) + if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE; break; case SIDA: - if (desc.mode == READ) + if (desc->mode == READ) ksmo.op = KVM_S390_MEMOP_SIDA_READ; - if (desc.mode == WRITE) + if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_SIDA_WRITE; break; case ABSOLUTE: - if (desc.mode == READ) + if (desc->mode == READ) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; - if (desc.mode == WRITE) + if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; break; case INVALID: ksmo.op = -1; } - if (desc.f_check) + if (desc->f_check) ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY; - if (desc.f_inject) + if (desc->f_inject) ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; - if (desc._set_flags) - ksmo.flags = desc.set_flags; - if (desc.f_key) { + if (desc->_set_flags) + ksmo.flags = desc->set_flags; + if (desc->f_key) { ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; - ksmo.key = desc.key; + ksmo.key = desc->key; } - if (desc._ar) - ksmo.ar = desc.ar; + if (desc->_ar) + ksmo.ar = desc->ar; else ksmo.ar = 0; - if (desc._sida_offset) - ksmo.sida_offset = desc.sida_offset; + if (desc->_sida_offset) + ksmo.sida_offset = desc->sida_offset; return ksmo; } @@ -183,7 +183,7 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) else \ __desc.gaddr = __desc.gaddr_v; \ } \ - __ksmo = ksmo_from_desc(__desc); \ + __ksmo = ksmo_from_desc(&__desc); \ print_memop(__info.vcpu, &__ksmo); \ err##memop_ioctl(__info, &__ksmo); \ }) From 12d27074193bda046122b6f94bacba818e4fd6c6 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:50 +0100 Subject: [PATCH 12/26] KVM: s390: selftest: memop: Replace macros by functions Replace the DEFAULT_* test helpers by functions, as they don't need the extra flexibility. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Thomas Huth Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-3-scgl@linux.ibm.com Message-Id: <20230206164602.138068-3-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 82 +++++++++++------------ 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 9c05d1205114..df1c726294b2 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -48,6 +48,8 @@ struct mop_desc { uint8_t key; }; +const uint8_t NO_KEY = 0xff; + static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) { struct kvm_s390_mem_op ksmo = { @@ -85,7 +87,7 @@ static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION; if (desc->_set_flags) ksmo.flags = desc->set_flags; - if (desc->f_key) { + if (desc->f_key && desc->key != NO_KEY) { ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION; ksmo.key = desc->key; } @@ -268,34 +270,28 @@ static void prepare_mem12(void) #define ASSERT_MEM_EQ(p1, p2, size) \ TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!") -#define DEFAULT_WRITE_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ -({ \ - struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ - enum mop_target __target = (mop_target_p); \ - uint32_t __size = (size); \ - \ - prepare_mem12(); \ - CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ - GADDR_V(mem1), ##__VA_ARGS__); \ - HOST_SYNC(__copy_cpu, STAGE_COPIED); \ - CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, \ - GADDR_V(mem2), ##__VA_ARGS__); \ - ASSERT_MEM_EQ(mem1, mem2, __size); \ -}) +static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu, + enum mop_target mop_target, uint32_t size, uint8_t key) +{ + prepare_mem12(); + CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, + GADDR_V(mem1), KEY(key)); + HOST_SYNC(copy_cpu, STAGE_COPIED); + CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, + GADDR_V(mem2), KEY(key)); + ASSERT_MEM_EQ(mem1, mem2, size); +} -#define DEFAULT_READ(copy_cpu, mop_cpu, mop_target_p, size, ...) \ -({ \ - struct test_info __copy_cpu = (copy_cpu), __mop_cpu = (mop_cpu); \ - enum mop_target __target = (mop_target_p); \ - uint32_t __size = (size); \ - \ - prepare_mem12(); \ - CHECK_N_DO(MOP, __mop_cpu, __target, WRITE, mem1, __size, \ - GADDR_V(mem1)); \ - HOST_SYNC(__copy_cpu, STAGE_COPIED); \ - CHECK_N_DO(MOP, __mop_cpu, __target, READ, mem2, __size, ##__VA_ARGS__);\ - ASSERT_MEM_EQ(mem1, mem2, __size); \ -}) +static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, + enum mop_target mop_target, uint32_t size, uint8_t key) +{ + prepare_mem12(); + CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1)); + HOST_SYNC(copy_cpu, STAGE_COPIED); + CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size, + GADDR_V(mem2), KEY(key)); + ASSERT_MEM_EQ(mem1, mem2, size); +} static void guest_copy(void) { @@ -310,7 +306,7 @@ static void test_copy(void) HOST_SYNC(t.vcpu, STAGE_INITED); - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY); kvm_vm_free(t.kvm_vm); } @@ -357,26 +353,26 @@ static void test_copy_key(void) HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); /* vm, no key */ - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY); /* vm/vcpu, machting key or key 0 */ - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(9)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, t.size, KEY(9)); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0); + default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); /* * There used to be different code paths for key handling depending on * if the region crossed a page boundary. * There currently are not, but the more tests the merrier. */ - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, 1, KEY(9)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(0)); - DEFAULT_WRITE_READ(t.vcpu, t.vm, ABSOLUTE, 1, KEY(9)); + default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0); + default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9); + default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0); + default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9); /* vm/vcpu, mismatching keys on read, but no fetch protection */ - DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(2)); - DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem1), KEY(2)); + default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); + default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2); kvm_vm_free(t.kvm_vm); } @@ -409,7 +405,7 @@ static void test_copy_key_storage_prot_override(void) HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); /* vcpu, mismatching keys, storage protection override in effect */ - DEFAULT_WRITE_READ(t.vcpu, t.vcpu, LOGICAL, t.size, KEY(2)); + default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2); kvm_vm_free(t.kvm_vm); } @@ -422,8 +418,8 @@ static void test_copy_key_fetch_prot(void) HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); /* vm/vcpu, matching key, fetch protection in effect */ - DEFAULT_READ(t.vcpu, t.vcpu, LOGICAL, t.size, GADDR_V(mem2), KEY(9)); - DEFAULT_READ(t.vcpu, t.vm, ABSOLUTE, t.size, GADDR_V(mem2), KEY(9)); + default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9); + default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9); kvm_vm_free(t.kvm_vm); } From de14e014a7c6a69b31b68374e6fe30f8da683505 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:51 +0100 Subject: [PATCH 13/26] KVM: s390: selftest: memop: Move testlist into main This allows checking if the necessary requirements for a test case are met via an arbitrary expression. In particular, it is easy to check if certain bits are set in the memop extension capability. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Thomas Huth Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-4-scgl@linux.ibm.com Message-Id: <20230206164602.138068-4-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 131 +++++++++++----------- 1 file changed, 66 insertions(+), 65 deletions(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index df1c726294b2..bbc191a13760 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -690,85 +690,86 @@ static void test_errors(void) kvm_vm_free(t.kvm_vm); } -struct testdef { - const char *name; - void (*test)(void); - int extension; -} testlist[] = { - { - .name = "simple copy", - .test = test_copy, - }, - { - .name = "generic error checks", - .test = test_errors, - }, - { - .name = "copy with storage keys", - .test = test_copy_key, - .extension = 1, - }, - { - .name = "copy with key storage protection override", - .test = test_copy_key_storage_prot_override, - .extension = 1, - }, - { - .name = "copy with key fetch protection", - .test = test_copy_key_fetch_prot, - .extension = 1, - }, - { - .name = "copy with key fetch protection override", - .test = test_copy_key_fetch_prot_override, - .extension = 1, - }, - { - .name = "error checks with key", - .test = test_errors_key, - .extension = 1, - }, - { - .name = "termination", - .test = test_termination, - .extension = 1, - }, - { - .name = "error checks with key storage protection override", - .test = test_errors_key_storage_prot_override, - .extension = 1, - }, - { - .name = "error checks without key fetch prot override", - .test = test_errors_key_fetch_prot_override_not_enabled, - .extension = 1, - }, - { - .name = "error checks with key fetch prot override", - .test = test_errors_key_fetch_prot_override_enabled, - .extension = 1, - }, -}; int main(int argc, char *argv[]) { int extension_cap, idx; TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP)); + extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); + + struct testdef { + const char *name; + void (*test)(void); + bool requirements_met; + } testlist[] = { + { + .name = "simple copy", + .test = test_copy, + .requirements_met = true, + }, + { + .name = "generic error checks", + .test = test_errors, + .requirements_met = true, + }, + { + .name = "copy with storage keys", + .test = test_copy_key, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key storage protection override", + .test = test_copy_key_storage_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key fetch protection", + .test = test_copy_key_fetch_prot, + .requirements_met = extension_cap > 0, + }, + { + .name = "copy with key fetch protection override", + .test = test_copy_key_fetch_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key", + .test = test_errors_key, + .requirements_met = extension_cap > 0, + }, + { + .name = "termination", + .test = test_termination, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key storage protection override", + .test = test_errors_key_storage_prot_override, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks without key fetch prot override", + .test = test_errors_key_fetch_prot_override_not_enabled, + .requirements_met = extension_cap > 0, + }, + { + .name = "error checks with key fetch prot override", + .test = test_errors_key_fetch_prot_override_enabled, + .requirements_met = extension_cap > 0, + }, + }; ksft_print_header(); - ksft_set_plan(ARRAY_SIZE(testlist)); - extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION); for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) { - if (extension_cap >= testlist[idx].extension) { + if (testlist[idx].requirements_met) { testlist[idx].test(); ksft_test_result_pass("%s\n", testlist[idx].name); } else { - ksft_test_result_skip("%s - extension level %d not supported\n", - testlist[idx].name, - testlist[idx].extension); + ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n", + testlist[idx].name, extension_cap); } } From 06e5da81c66cd688d12c0c38ef35dd7c1c1845e5 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:52 +0100 Subject: [PATCH 14/26] KVM: s390: selftest: memop: Add bad address test Add a test that tries a real write to a bad address. The existing CHECK_ONLY test doesn't cover all paths. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-5-scgl@linux.ibm.com Message-Id: <20230206164602.138068-5-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index bbc191a13760..00737cceacda 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -641,7 +641,9 @@ static void _test_errors_common(struct test_info info, enum mop_target target, i /* Bad guest address: */ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY); - TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access"); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY"); + rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL)); + TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write"); /* Bad host address: */ rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1)); From 76a2ee43ed9a34f163d0331e3ba320c34c66c64e Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:53 +0100 Subject: [PATCH 15/26] KVM: s390: selftest: memop: Fix typo "acceeded" isn't a word, should be "exceeded". Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Thomas Huth Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20230206164602.138068-6-scgl@linux.ibm.com Message-Id: <20230206164602.138068-6-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 00737cceacda..033a8603a096 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -602,7 +602,7 @@ static void test_errors_key_fetch_prot_override_enabled(void) /* * vcpu, mismatching keys on fetch, - * fetch protection override does not apply because memory range acceeded + * fetch protection override does not apply because memory range exceeded */ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2)); CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1, From 12c12a9924b475d6a20760f992fbf5e080747ae3 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:54 +0100 Subject: [PATCH 16/26] KVM: s390: selftest: memop: Fix wrong address being used in test The guest code sets the key for mem1 only. In order to provoke a protection exception the test codes needs to address mem1. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20230206164602.138068-7-scgl@linux.ibm.com Message-Id: <20230206164602.138068-7-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 033a8603a096..1ae5c01f9904 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -450,9 +450,9 @@ static void test_errors_key(void) /* vm/vcpu, mismatching keys, fetch protection in effect */ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2)); - CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2)); + CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2)); kvm_vm_free(t.kvm_vm); } From dc55ceaef616bdac73810d6588c23a5b50d24911 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:55 +0100 Subject: [PATCH 17/26] KVM: s390: selftest: memop: Fix integer literal The address is a 64 bit value, specifying a 32 bit value can crash the guest. In this case things worked out with -O2 but not -O0. Signed-off-by: Janis Schoetterl-Glausch Fixes: 1bb873495a9e ("KVM: s390: selftests: Add more copy memop tests") Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20230206164602.138068-8-scgl@linux.ibm.com Message-Id: <20230206164602.138068-8-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index 1ae5c01f9904..c5fec84ef3c2 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -514,7 +514,7 @@ static void guest_copy_key_fetch_prot_override(void) GUEST_SYNC(STAGE_INITED); set_storage_key_range(0, PAGE_SIZE, 0x18); set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0); - asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0), [key] "r"(0x18) : "cc"); + asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc"); GUEST_SYNC(STAGE_SKEYS_SET); for (;;) { From a41f505e9f7f4e13afb6f0e331f99360a3088af7 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:56 +0100 Subject: [PATCH 18/26] KVM: s390: Move common code of mem_op functions into function The vcpu and vm mem_op ioctl implementations share some functionality. Move argument checking into a function and call it from both implementations. This allows code reuse in case of additional future mem_op operations. Suggested-by: Janosch Frank Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-9-scgl@linux.ibm.com Message-Id: <20230206164602.138068-9-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 52 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 28 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cb72f9a09fb3..9645015f5921 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2764,24 +2764,32 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) return r; } -static bool access_key_invalid(u8 access_key) +static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags) { - return access_key > 0xf; + if (mop->flags & ~supported_flags || !mop->size) + return -EINVAL; + if (mop->size > MEM_OP_MAX_SIZE) + return -E2BIG; + if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { + if (mop->key > 0xf) + return -EINVAL; + } else { + mop->key = 0; + } + return 0; } static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; - u64 supported_flags; void *tmpbuf = NULL; int r, srcu_idx; - supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION - | KVM_S390_MEMOP_F_CHECK_ONLY; - if (mop->flags & ~supported_flags || !mop->size) - return -EINVAL; - if (mop->size > MEM_OP_MAX_SIZE) - return -E2BIG; + r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION | + KVM_S390_MEMOP_F_CHECK_ONLY); + if (r) + return r; + /* * This is technically a heuristic only, if the kvm->lock is not * taken, it is not guaranteed that the vm is/remains non-protected. @@ -2793,12 +2801,6 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) */ if (kvm_s390_pv_get_handle(kvm)) return -EINVAL; - if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { - if (access_key_invalid(mop->key)) - return -EINVAL; - } else { - mop->key = 0; - } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -5250,23 +5252,17 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, { void __user *uaddr = (void __user *)mop->buf; void *tmpbuf = NULL; - int r = 0; - const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION - | KVM_S390_MEMOP_F_CHECK_ONLY - | KVM_S390_MEMOP_F_SKEY_PROTECTION; + int r; - if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size) + r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION | + KVM_S390_MEMOP_F_CHECK_ONLY | + KVM_S390_MEMOP_F_SKEY_PROTECTION); + if (r) + return r; + if (mop->ar >= NUM_ACRS) return -EINVAL; - if (mop->size > MEM_OP_MAX_SIZE) - return -E2BIG; if (kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; - if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) { - if (access_key_invalid(mop->key)) - return -EINVAL; - } else { - mop->key = 0; - } if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) From 8550bcb754bc9ee0f8906c416e9e900e4ed5607c Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:57 +0100 Subject: [PATCH 19/26] KVM: s390: Dispatch to implementing function at top level of vm mem_op Instead of having one function covering all mem_op operations, have a function implementing absolute access and dispatch to that function in its caller, based on the operation code. This way additional future operations can be implemented by adding an implementing function without changing existing operations. Suggested-by: Janosch Frank Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Thomas Huth Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-10-scgl@linux.ibm.com Message-Id: <20230206164602.138068-10-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 9645015f5921..0053b1fbfc76 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2779,7 +2779,7 @@ static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_fla return 0; } -static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; void *tmpbuf = NULL; @@ -2790,17 +2790,6 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) if (r) return r; - /* - * This is technically a heuristic only, if the kvm->lock is not - * taken, it is not guaranteed that the vm is/remains non-protected. - * This is ok from a kernel perspective, wrongdoing is detected - * on the access, -EFAULT is returned and the vm may crash the - * next time it accesses the memory in question. - * There is no sane usecase to do switching and a memop on two - * different CPUs at the same time. - */ - if (kvm_s390_pv_get_handle(kvm)) - return -EINVAL; if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) { tmpbuf = vmalloc(mop->size); if (!tmpbuf) @@ -2841,8 +2830,6 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) } break; } - default: - r = -EINVAL; } out_unlock: @@ -2852,6 +2839,29 @@ out_unlock: return r; } +static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + /* + * This is technically a heuristic only, if the kvm->lock is not + * taken, it is not guaranteed that the vm is/remains non-protected. + * This is ok from a kernel perspective, wrongdoing is detected + * on the access, -EFAULT is returned and the vm may crash the + * next time it accesses the memory in question. + * There is no sane usecase to do switching and a memop on two + * different CPUs at the same time. + */ + if (kvm_s390_pv_get_handle(kvm)) + return -EINVAL; + + switch (mop->op) { + case KVM_S390_MEMOP_ABSOLUTE_READ: + case KVM_S390_MEMOP_ABSOLUTE_WRITE: + return kvm_s390_vm_mem_op_abs(kvm, mop); + default: + return -EINVAL; + } +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { From 0d6d4d23955c8aaa69f361897ec1aad67bf93653 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:58 +0100 Subject: [PATCH 20/26] KVM: s390: Refactor absolute vm mem_op function Remove code duplication with regards to the CHECK_ONLY flag. Decrease the number of indents. No functional change indented. Suggested-by: Janosch Frank Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Thomas Huth Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-11-scgl@linux.ibm.com Message-Id: <20230206164602.138068-11-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 43 +++++++++++++++++----------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0053b1fbfc76..23f50437a328 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2782,6 +2782,7 @@ static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_fla static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + enum gacc_mode acc_mode; void *tmpbuf = NULL; int r, srcu_idx; @@ -2803,33 +2804,25 @@ static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop) goto out_unlock; } - switch (mop->op) { - case KVM_S390_MEMOP_ABSOLUTE_READ: { - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key); - } else { - r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, - mop->size, GACC_FETCH, mop->key); - if (r == 0) { - if (copy_to_user(uaddr, tmpbuf, mop->size)) - r = -EFAULT; - } - } - break; + acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE; + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key); + goto out_unlock; } - case KVM_S390_MEMOP_ABSOLUTE_WRITE: { - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key); - } else { - if (copy_from_user(tmpbuf, uaddr, mop->size)) { - r = -EFAULT; - break; - } - r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, - mop->size, GACC_STORE, mop->key); + if (acc_mode == GACC_FETCH) { + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_FETCH, mop->key); + if (r) + goto out_unlock; + if (copy_to_user(uaddr, tmpbuf, mop->size)) + r = -EFAULT; + } else { + if (copy_from_user(tmpbuf, uaddr, mop->size)) { + r = -EFAULT; + goto out_unlock; } - break; - } + r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf, + mop->size, GACC_STORE, mop->key); } out_unlock: From 701422b3438240a98000e0db3ef68d4db7991918 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:45:59 +0100 Subject: [PATCH 21/26] KVM: s390: Refactor vcpu mem_op function Remove code duplication with regards to the CHECK_ONLY flag. Decrease the number of indents. No functional change indented. Suggested-by: Janosch Frank Signed-off-by: Janis Schoetterl-Glausch Link: https://lore.kernel.org/r/20230206164602.138068-12-scgl@linux.ibm.com Message-Id: <20230206164602.138068-12-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 23f50437a328..17368d118653 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5254,6 +5254,7 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + enum gacc_mode acc_mode; void *tmpbuf = NULL; int r; @@ -5272,38 +5273,35 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu, return -ENOMEM; } - switch (mop->op) { - case KVM_S390_MEMOP_LOGICAL_READ: - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, - GACC_FETCH, mop->key); - break; - } + acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE; + if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { + r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, + acc_mode, mop->key); + goto out_inject; + } + if (acc_mode == GACC_FETCH) { r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size, mop->key); - if (r == 0) { - if (copy_to_user(uaddr, tmpbuf, mop->size)) - r = -EFAULT; - } - break; - case KVM_S390_MEMOP_LOGICAL_WRITE: - if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) { - r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, - GACC_STORE, mop->key); - break; + if (r) + goto out_inject; + if (copy_to_user(uaddr, tmpbuf, mop->size)) { + r = -EFAULT; + goto out_free; } + } else { if (copy_from_user(tmpbuf, uaddr, mop->size)) { r = -EFAULT; - break; + goto out_free; } r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size, mop->key); - break; } +out_inject: if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0) kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm); +out_free: vfree(tmpbuf); return r; } From 3fd49805d19d1c566e92358d2e3d9fadb3b5ec16 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:46:00 +0100 Subject: [PATCH 22/26] KVM: s390: Extend MEM_OP ioctl by storage key checked cmpxchg User space can use the MEM_OP ioctl to make storage key checked reads and writes to the guest, however, it has no way of performing atomic, key checked, accesses to the guest. Extend the MEM_OP ioctl in order to allow for this, by adding a cmpxchg op. For now, support this op for absolute accesses only. This op can be used, for example, to set the device-state-change indicator and the adapter-local-summary indicator atomically. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-13-scgl@linux.ibm.com Message-Id: <20230206164602.138068-13-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/gaccess.c | 109 +++++++++++++++++++++++++++++++++++++++ arch/s390/kvm/gaccess.h | 3 ++ arch/s390/kvm/kvm-s390.c | 56 +++++++++++++++++++- include/uapi/linux/kvm.h | 8 +++ 4 files changed, 175 insertions(+), 1 deletion(-) diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 0243b6e38d36..3eb85f254881 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -1161,6 +1161,115 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, return rc; } +/** + * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address. + * @kvm: Virtual machine instance. + * @gpa: Absolute guest address of the location to be changed. + * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a + * non power of two will result in failure. + * @old_addr: Pointer to old value. If the location at @gpa contains this value, + * the exchange will succeed. After calling cmpxchg_guest_abs_with_key() + * *@old_addr contains the value at @gpa before the attempt to + * exchange the value. + * @new: The value to place at @gpa. + * @access_key: The access key to use for the guest access. + * @success: output value indicating if an exchange occurred. + * + * Atomically exchange the value at @gpa by @new, if it contains *@old. + * Honors storage keys. + * + * Return: * 0: successful exchange + * * >0: a program interruption code indicating the reason cmpxchg could + * not be attempted + * * -EINVAL: address misaligned or len not power of two + * * -EAGAIN: transient failure (len 1 or 2) + * * -EOPNOTSUPP: read-only memslot (should never occur) + */ +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, + __uint128_t *old_addr, __uint128_t new, + u8 access_key, bool *success) +{ + gfn_t gfn = gpa_to_gfn(gpa); + struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn); + bool writable; + hva_t hva; + int ret; + + if (!IS_ALIGNED(gpa, len)) + return -EINVAL; + + hva = gfn_to_hva_memslot_prot(slot, gfn, &writable); + if (kvm_is_error_hva(hva)) + return PGM_ADDRESSING; + /* + * Check if it's a read-only memslot, even though that cannot occur + * since those are unsupported. + * Don't try to actually handle that case. + */ + if (!writable) + return -EOPNOTSUPP; + + hva += offset_in_page(gpa); + /* + * The cmpxchg_user_key macro depends on the type of "old", so we need + * a case for each valid length and get some code duplication as long + * as we don't introduce a new macro. + */ + switch (len) { + case 1: { + u8 old; + + ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 2: { + u16 old; + + ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 4: { + u32 old; + + ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 8: { + u64 old; + + ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + case 16: { + __uint128_t old; + + ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key); + *success = !ret && old == *old_addr; + *old_addr = old; + break; + } + default: + return -EINVAL; + } + if (*success) + mark_page_dirty_in_slot(kvm, slot, gfn); + /* + * Assume that the fault is caused by protection, either key protection + * or user page write protection. + */ + if (ret == -EFAULT) + ret = PGM_PROTECTION; + return ret; +} + /** * guest_translate_address_with_key - translate guest logical into guest absolute address * @vcpu: virtual cpu diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 9408d6cc8e2c..b320d12aa049 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, void *data, unsigned long len, enum gacc_mode mode); +int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old, + __uint128_t new, u8 access_key, bool *success); + /** * write_guest_with_key - copy data from kernel space to guest space * @vcpu: virtual cpu diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 17368d118653..8dfda720f60d 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -584,7 +584,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_VCPU_RESETS: case KVM_CAP_SET_GUEST_DEBUG: case KVM_CAP_S390_DIAG318: - case KVM_CAP_S390_MEM_OP_EXTENSION: r = 1; break; case KVM_CAP_SET_GUEST_DEBUG2: @@ -598,6 +597,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_MEM_OP: r = MEM_OP_MAX_SIZE; break; + case KVM_CAP_S390_MEM_OP_EXTENSION: + /* + * Flag bits indicating which extensions are supported. + * If r > 0, the base extension must also be supported/indicated, + * in order to maintain backwards compatibility. + */ + r = KVM_S390_MEMOP_EXTENSION_CAP_BASE | + KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG; + break; case KVM_CAP_NR_VCPUS: case KVM_CAP_MAX_VCPUS: case KVM_CAP_MAX_VCPU_ID: @@ -2832,6 +2840,50 @@ out_unlock: return r; } +static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop) +{ + void __user *uaddr = (void __user *)mop->buf; + void __user *old_addr = (void __user *)mop->old_addr; + union { + __uint128_t quad; + char raw[sizeof(__uint128_t)]; + } old = { .quad = 0}, new = { .quad = 0 }; + unsigned int off_in_quad = sizeof(new) - mop->size; + int r, srcu_idx; + bool success; + + r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION); + if (r) + return r; + /* + * This validates off_in_quad. Checking that size is a power + * of two is not necessary, as cmpxchg_guest_abs_with_key + * takes care of that + */ + if (mop->size > sizeof(new)) + return -EINVAL; + if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size)) + return -EFAULT; + if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size)) + return -EFAULT; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_is_error_gpa(kvm, mop->gaddr)) { + r = PGM_ADDRESSING; + goto out_unlock; + } + + r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad, + new.quad, mop->key, &success); + if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size)) + r = -EFAULT; + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + return r; +} + static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) { /* @@ -2850,6 +2902,8 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop) case KVM_S390_MEMOP_ABSOLUTE_READ: case KVM_S390_MEMOP_ABSOLUTE_WRITE: return kvm_s390_vm_mem_op_abs(kvm, mop); + case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: + return kvm_s390_vm_mem_op_cmpxchg(kvm, mop); default: return -EINVAL; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 55155e262646..d2f30463c133 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -583,6 +583,8 @@ struct kvm_s390_mem_op { struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if cmpxchg flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -595,11 +597,17 @@ struct kvm_s390_mem_op { #define KVM_S390_MEMOP_SIDA_WRITE 3 #define KVM_S390_MEMOP_ABSOLUTE_READ 4 #define KVM_S390_MEMOP_ABSOLUTE_WRITE 5 +#define KVM_S390_MEMOP_ABSOLUTE_CMPXCHG 6 + /* flags for kvm_s390_mem_op->flags */ #define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0) #define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1) #define KVM_S390_MEMOP_F_SKEY_PROTECTION (1ULL << 2) +/* flags specifying extension support via KVM_CAP_S390_MEM_OP_EXTENSION */ +#define KVM_S390_MEMOP_EXTENSION_CAP_BASE (1 << 0) +#define KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG (1 << 1) + /* for KVM_INTERRUPT */ struct kvm_interrupt { /* in */ From a7b041732802db0dab0a7740a6c8338b803bf933 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Mon, 6 Feb 2023 17:46:01 +0100 Subject: [PATCH 23/26] Documentation: KVM: s390: Describe KVM_S390_MEMOP_F_CMPXCHG Describe the semantics of the new KVM_S390_MEMOP_F_CMPXCHG flag for absolute vm write memops which allows user space to perform (storage key checked) cmpxchg operations on guest memory. Signed-off-by: Janis Schoetterl-Glausch Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20230206164602.138068-14-scgl@linux.ibm.com Message-Id: <20230206164602.138068-14-scgl@linux.ibm.com> [frankja@de.ibm.com: Removed a line from an earlier version] Signed-off-by: Janosch Frank --- Documentation/virt/kvm/api.rst | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 8cd7fd05d53b..a4c9dbccc7c2 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -3728,7 +3728,7 @@ The fields in each entry are defined as follows: :Parameters: struct kvm_s390_mem_op (in) :Returns: = 0 on success, < 0 on generic error (e.g. -EFAULT or -ENOMEM), - > 0 if an exception occurred while walking the page tables + 16 bit program exception code if the access causes such an exception Read or write data from/to the VM's memory. The KVM_CAP_S390_MEM_OP_EXTENSION capability specifies what functionality is @@ -3746,6 +3746,8 @@ Parameters are specified via the following structure:: struct { __u8 ar; /* the access register number */ __u8 key; /* access key, ignored if flag unset */ + __u8 pad1[6]; /* ignored */ + __u64 old_addr; /* ignored if flag unset */ }; __u32 sida_offset; /* offset into the sida */ __u8 reserved[32]; /* ignored */ @@ -3773,6 +3775,7 @@ Possible operations are: * ``KVM_S390_MEMOP_ABSOLUTE_WRITE`` * ``KVM_S390_MEMOP_SIDA_READ`` * ``KVM_S390_MEMOP_SIDA_WRITE`` + * ``KVM_S390_MEMOP_ABSOLUTE_CMPXCHG`` Logical read/write: ^^^^^^^^^^^^^^^^^^^ @@ -3821,7 +3824,7 @@ the checks required for storage key protection as one operation (as opposed to user space getting the storage keys, performing the checks, and accessing memory thereafter, which could lead to a delay between check and access). Absolute accesses are permitted for the VM ioctl if KVM_CAP_S390_MEM_OP_EXTENSION -is > 0. +has the KVM_S390_MEMOP_EXTENSION_CAP_BASE bit set. Currently absolute accesses are not permitted for VCPU ioctls. Absolute accesses are permitted for non-protected guests only. @@ -3829,7 +3832,26 @@ Supported flags: * ``KVM_S390_MEMOP_F_CHECK_ONLY`` * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` -The semantics of the flags are as for logical accesses. +The semantics of the flags common with logical accesses are as for logical +accesses. + +Absolute cmpxchg: +^^^^^^^^^^^^^^^^^ + +Perform cmpxchg on absolute guest memory. Intended for use with the +KVM_S390_MEMOP_F_SKEY_PROTECTION flag. +Instead of doing an unconditional write, the access occurs only if the target +location contains the value pointed to by "old_addr". +This is performed as an atomic cmpxchg with the length specified by the "size" +parameter. "size" must be a power of two up to and including 16. +If the exchange did not take place because the target value doesn't match the +old value, the value "old_addr" points to is replaced by the target value. +User space can tell if an exchange took place by checking if this replacement +occurred. The cmpxchg op is permitted for the VM ioctl if +KVM_CAP_S390_MEM_OP_EXTENSION has flag KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG set. + +Supported flags: + * ``KVM_S390_MEMOP_F_SKEY_PROTECTION`` SIDA read/write: ^^^^^^^^^^^^^^^^ From 0dd714bfd200068e3c0d6b37861056367868e612 Mon Sep 17 00:00:00 2001 From: Janis Schoetterl-Glausch Date: Tue, 7 Feb 2023 17:42:25 +0100 Subject: [PATCH 24/26] KVM: s390: selftest: memop: Add cmpxchg tests Test successful exchange, unsuccessful exchange, storage key protection and invalid arguments. Signed-off-by: Janis Schoetterl-Glausch Acked-by: Janosch Frank Link: https://lore.kernel.org/r/20230207164225.2114706-1-scgl@linux.ibm.com Message-Id: <20230207164225.2114706-1-scgl@linux.ibm.com> Signed-off-by: Janosch Frank --- tools/testing/selftests/kvm/s390x/memop.c | 411 +++++++++++++++++++++- 1 file changed, 394 insertions(+), 17 deletions(-) diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index c5fec84ef3c2..8e4b94d7b8dd 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -9,6 +9,7 @@ #include #include #include +#include #include @@ -26,6 +27,7 @@ enum mop_target { enum mop_access_mode { READ, WRITE, + CMPXCHG, }; struct mop_desc { @@ -44,13 +46,16 @@ struct mop_desc { enum mop_access_mode mode; void *buf; uint32_t sida_offset; + void *old; + uint8_t old_value[16]; + bool *cmpxchg_success; uint8_t ar; uint8_t key; }; const uint8_t NO_KEY = 0xff; -static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) +static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc) { struct kvm_s390_mem_op ksmo = { .gaddr = (uintptr_t)desc->gaddr, @@ -77,6 +82,11 @@ static struct kvm_s390_mem_op ksmo_from_desc(const struct mop_desc *desc) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ; if (desc->mode == WRITE) ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE; + if (desc->mode == CMPXCHG) { + ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG; + ksmo.old_addr = (uint64_t)desc->old; + memcpy(desc->old_value, desc->old, desc->size); + } break; case INVALID: ksmo.op = -1; @@ -135,9 +145,13 @@ static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksm case KVM_S390_MEMOP_ABSOLUTE_WRITE: printf("ABSOLUTE, WRITE, "); break; + case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG: + printf("ABSOLUTE, CMPXCHG, "); + break; } - printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u", - ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key); + printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx", + ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key, + ksmo->old_addr); if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY) printf(", CHECK_ONLY"); if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) @@ -147,17 +161,8 @@ static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksm puts(")"); } -static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) -{ - struct kvm_vcpu *vcpu = info.vcpu; - - if (!vcpu) - vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo); - else - vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); -} - -static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) +static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, + struct mop_desc *desc) { struct kvm_vcpu *vcpu = info.vcpu; @@ -167,6 +172,21 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo); } +static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, + struct mop_desc *desc) +{ + int r; + + r = err_memop_ioctl(info, ksmo, desc); + if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) { + if (desc->cmpxchg_success) { + int diff = memcmp(desc->old_value, desc->old, desc->size); + *desc->cmpxchg_success = !diff; + } + } + TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r)); +} + #define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \ ({ \ struct test_info __info = (info_p); \ @@ -187,7 +207,7 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) } \ __ksmo = ksmo_from_desc(&__desc); \ print_memop(__info.vcpu, &__ksmo); \ - err##memop_ioctl(__info, &__ksmo); \ + err##memop_ioctl(__info, &__ksmo, &__desc); \ }) #define MOP(...) MEMOP(, __VA_ARGS__) @@ -201,6 +221,8 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) #define AR(a) ._ar = 1, .ar = (a) #define KEY(a) .f_key = 1, .key = (a) #define INJECT .f_inject = 1 +#define CMPXCHG_OLD(o) .old = (o) +#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s) #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) @@ -210,8 +232,8 @@ static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) -static uint8_t mem1[65536]; -static uint8_t mem2[65536]; +static uint8_t __aligned(PAGE_SIZE) mem1[65536]; +static uint8_t __aligned(PAGE_SIZE) mem2[65536]; struct test_default { struct kvm_vm *kvm_vm; @@ -243,6 +265,8 @@ enum stage { STAGE_SKEYS_SET, /* Guest copied memory (locations up to test case) */ STAGE_COPIED, + /* End of guest code reached */ + STAGE_DONE, }; #define HOST_SYNC(info_p, stage) \ @@ -254,6 +278,9 @@ enum stage { \ vcpu_run(__vcpu); \ get_ucall(__vcpu, &uc); \ + if (uc.cmd == UCALL_ABORT) { \ + REPORT_GUEST_ASSERT_2(uc, "hints: %lu, %lu"); \ + } \ ASSERT_EQ(uc.cmd, UCALL_SYNC); \ ASSERT_EQ(uc.args[1], __stage); \ }) \ @@ -293,6 +320,44 @@ static void default_read(struct test_info copy_cpu, struct test_info mop_cpu, ASSERT_MEM_EQ(mem1, mem2, size); } +static void default_cmpxchg(struct test_default *test, uint8_t key) +{ + for (int size = 1; size <= 16; size *= 2) { + for (int offset = 0; offset < 16; offset += size) { + uint8_t __aligned(16) new[16] = {}; + uint8_t __aligned(16) old[16]; + bool succ; + + prepare_mem12(); + default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY); + + memcpy(&old, mem1, 16); + MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset, + size, GADDR_V(mem1 + offset), + CMPXCHG_OLD(old + offset), + CMPXCHG_SUCCESS(&succ), KEY(key)); + HOST_SYNC(test->vcpu, STAGE_COPIED); + MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); + TEST_ASSERT(succ, "exchange of values should succeed"); + memcpy(mem1 + offset, new + offset, size); + ASSERT_MEM_EQ(mem1, mem2, 16); + + memcpy(&old, mem1, 16); + new[offset]++; + old[offset]++; + MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset, + size, GADDR_V(mem1 + offset), + CMPXCHG_OLD(old + offset), + CMPXCHG_SUCCESS(&succ), KEY(key)); + HOST_SYNC(test->vcpu, STAGE_COPIED); + MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2)); + TEST_ASSERT(!succ, "exchange of values should not succeed"); + ASSERT_MEM_EQ(mem1, mem2, 16); + ASSERT_MEM_EQ(&old, mem1, 16); + } + } +} + static void guest_copy(void) { GUEST_SYNC(STAGE_INITED); @@ -377,6 +442,248 @@ static void test_copy_key(void) kvm_vm_free(t.kvm_vm); } +static void test_cmpxchg_key(void) +{ + struct test_default t = test_default_init(guest_copy_key); + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + default_cmpxchg(&t, NO_KEY); + default_cmpxchg(&t, 0); + default_cmpxchg(&t, 9); + + kvm_vm_free(t.kvm_vm); +} + +static __uint128_t cut_to_size(int size, __uint128_t val) +{ + switch (size) { + case 1: + return (uint8_t)val; + case 2: + return (uint16_t)val; + case 4: + return (uint32_t)val; + case 8: + return (uint64_t)val; + case 16: + return val; + } + GUEST_ASSERT_1(false, "Invalid size"); + return 0; +} + +static bool popcount_eq(__uint128_t a, __uint128_t b) +{ + unsigned int count_a, count_b; + + count_a = __builtin_popcountl((uint64_t)(a >> 64)) + + __builtin_popcountl((uint64_t)a); + count_b = __builtin_popcountl((uint64_t)(b >> 64)) + + __builtin_popcountl((uint64_t)b); + return count_a == count_b; +} + +static __uint128_t rotate(int size, __uint128_t val, int amount) +{ + unsigned int bits = size * 8; + + amount = (amount + bits) % bits; + val = cut_to_size(size, val); + return (val << (bits - amount)) | (val >> amount); +} + +const unsigned int max_block = 16; + +static void choose_block(bool guest, int i, int *size, int *offset) +{ + unsigned int rand; + + rand = i; + if (guest) { + rand = rand * 19 + 11; + *size = 1 << ((rand % 3) + 2); + rand = rand * 19 + 11; + *offset = (rand % max_block) & ~(*size - 1); + } else { + rand = rand * 17 + 5; + *size = 1 << (rand % 5); + rand = rand * 17 + 5; + *offset = (rand % max_block) & ~(*size - 1); + } +} + +static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old) +{ + unsigned int rand; + int amount; + bool swap; + + rand = i; + rand = rand * 3 + 1; + if (guest) + rand = rand * 3 + 1; + swap = rand % 2 == 0; + if (swap) { + int i, j; + __uint128_t new; + uint8_t byte0, byte1; + + rand = rand * 3 + 1; + i = rand % size; + rand = rand * 3 + 1; + j = rand % size; + if (i == j) + return old; + new = rotate(16, old, i * 8); + byte0 = new & 0xff; + new &= ~0xff; + new = rotate(16, new, -i * 8); + new = rotate(16, new, j * 8); + byte1 = new & 0xff; + new = (new & ~0xff) | byte0; + new = rotate(16, new, -j * 8); + new = rotate(16, new, i * 8); + new = new | byte1; + new = rotate(16, new, -i * 8); + return new; + } + rand = rand * 3 + 1; + amount = rand % (size * 8); + return rotate(size, old, amount); +} + +static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new) +{ + bool ret; + + switch (size) { + case 4: { + uint32_t old = *old_addr; + + asm volatile ("cs %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint32_t *)(target)) + : [new] "d" ((uint32_t)new) + : "cc" + ); + ret = old == (uint32_t)*old_addr; + *old_addr = old; + return ret; + } + case 8: { + uint64_t old = *old_addr; + + asm volatile ("csg %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(uint64_t *)(target)) + : [new] "d" ((uint64_t)new) + : "cc" + ); + ret = old == (uint64_t)*old_addr; + *old_addr = old; + return ret; + } + case 16: { + __uint128_t old = *old_addr; + + asm volatile ("cdsg %[old],%[new],%[address]" + : [old] "+d" (old), + [address] "+Q" (*(__uint128_t *)(target)) + : [new] "d" (new) + : "cc" + ); + ret = old == *old_addr; + *old_addr = old; + return ret; + } + } + GUEST_ASSERT_1(false, "Invalid size"); + return 0; +} + +const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000; + +static void guest_cmpxchg_key(void) +{ + int size, offset; + __uint128_t old, new; + + set_storage_key_range(mem1, max_block, 0x10); + set_storage_key_range(mem2, max_block, 0x10); + GUEST_SYNC(STAGE_SKEYS_SET); + + for (int i = 0; i < cmpxchg_iter_outer; i++) { + do { + old = 1; + } while (!_cmpxchg(16, mem1, &old, 0)); + for (int j = 0; j < cmpxchg_iter_inner; j++) { + choose_block(true, i + j, &size, &offset); + do { + new = permutate_bits(true, i + j, size, old); + } while (!_cmpxchg(size, mem2 + offset, &old, new)); + } + } + + GUEST_SYNC(STAGE_DONE); +} + +static void *run_guest(void *data) +{ + struct test_info *info = data; + + HOST_SYNC(*info, STAGE_DONE); + return NULL; +} + +static char *quad_to_char(__uint128_t *quad, int size) +{ + return ((char *)quad) + (sizeof(*quad) - size); +} + +static void test_cmpxchg_key_concurrent(void) +{ + struct test_default t = test_default_init(guest_cmpxchg_key); + int size, offset; + __uint128_t old, new; + bool success; + pthread_t thread; + + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + prepare_mem12(); + MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2)); + pthread_create(&thread, NULL, run_guest, &t.vcpu); + + for (int i = 0; i < cmpxchg_iter_outer; i++) { + do { + old = 0; + new = 1; + MOP(t.vm, ABSOLUTE, CMPXCHG, &new, + sizeof(new), GADDR_V(mem1), + CMPXCHG_OLD(&old), + CMPXCHG_SUCCESS(&success), KEY(1)); + } while (!success); + for (int j = 0; j < cmpxchg_iter_inner; j++) { + choose_block(false, i + j, &size, &offset); + do { + new = permutate_bits(false, i + j, size, old); + MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size), + size, GADDR_V(mem2 + offset), + CMPXCHG_OLD(quad_to_char(&old, size)), + CMPXCHG_SUCCESS(&success), KEY(1)); + } while (!success); + } + } + + pthread_join(thread, NULL); + + MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2)); + TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2), + "Must retain number of set bits"); + + kvm_vm_free(t.kvm_vm); +} + static void guest_copy_key_fetch_prot(void) { /* @@ -457,6 +764,24 @@ static void test_errors_key(void) kvm_vm_free(t.kvm_vm); } +static void test_errors_cmpxchg_key(void) +{ + struct test_default t = test_default_init(guest_copy_key_fetch_prot); + int i; + + HOST_SYNC(t.vcpu, STAGE_INITED); + HOST_SYNC(t.vcpu, STAGE_SKEYS_SET); + + for (i = 1; i <= 16; i *= 2) { + __uint128_t old = 0; + + ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2), + CMPXCHG_OLD(&old), KEY(2)); + } + + kvm_vm_free(t.kvm_vm); +} + static void test_termination(void) { struct test_default t = test_default_init(guest_error_key); @@ -692,6 +1017,38 @@ static void test_errors(void) kvm_vm_free(t.kvm_vm); } +static void test_errors_cmpxchg(void) +{ + struct test_default t = test_default_init(guest_idle); + __uint128_t old; + int rv, i, power = 1; + + HOST_SYNC(t.vcpu, STAGE_INITED); + + for (i = 0; i < 32; i++) { + if (i == power) { + power *= 2; + continue; + } + rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl allows bad size for cmpxchg"); + } + for (i = 1; i <= 16; i *= 2) { + rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg"); + } + for (i = 2; i <= 16; i *= 2) { + rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1), + CMPXCHG_OLD(&old)); + TEST_ASSERT(rv == -1 && errno == EINVAL, + "ioctl allows bad alignment for cmpxchg"); + } + + kvm_vm_free(t.kvm_vm); +} int main(int argc, char *argv[]) { @@ -720,6 +1077,16 @@ int main(int argc, char *argv[]) .test = test_copy_key, .requirements_met = extension_cap > 0, }, + { + .name = "cmpxchg with storage keys", + .test = test_cmpxchg_key, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "concurrently cmpxchg with storage keys", + .test = test_cmpxchg_key_concurrent, + .requirements_met = extension_cap & 0x2, + }, { .name = "copy with key storage protection override", .test = test_copy_key_storage_prot_override, @@ -740,6 +1107,16 @@ int main(int argc, char *argv[]) .test = test_errors_key, .requirements_met = extension_cap > 0, }, + { + .name = "error checks for cmpxchg with key", + .test = test_errors_cmpxchg_key, + .requirements_met = extension_cap & 0x2, + }, + { + .name = "error checks for cmpxchg", + .test = test_errors_cmpxchg, + .requirements_met = extension_cap & 0x2, + }, { .name = "termination", .test = test_termination, From 1abb32697a0d0a308d24fe75b1b7caf54c1f7a51 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Mon, 7 Nov 2022 09:57:27 +0100 Subject: [PATCH 25/26] KVM: s390: GISA: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same). In chsc_sgib(), do the virtual-physical conversion in the caller since the caller needs to make sure it is a 31-bit address and zero has a special meaning (disassociating the GIB). Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Reviewed-by: Michael Mueller Link: https://lore.kernel.org/r/20221107085727.1533792-1-nrb@linux.ibm.com Message-Id: <20221107085727.1533792-1-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/interrupt.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index 1dae78deddf2..bbb280633cb8 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3099,9 +3099,9 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) static void process_gib_alert_list(void) { struct kvm_s390_gisa_interrupt *gi; + u32 final, gisa_phys, origin = 0UL; struct kvm_s390_gisa *gisa; struct kvm *kvm; - u32 final, origin = 0UL; do { /* @@ -3127,9 +3127,10 @@ static void process_gib_alert_list(void) * interruptions asap. */ while (origin & GISA_ADDR_MASK) { - gisa = (struct kvm_s390_gisa *)(u64)origin; + gisa_phys = origin; + gisa = phys_to_virt(gisa_phys); origin = gisa->next_alert; - gisa->next_alert = (u32)(u64)gisa; + gisa->next_alert = gisa_phys; kvm = container_of(gisa, struct sie_page2, gisa)->kvm; gi = &kvm->arch.gisa_int; if (hrtimer_active(&gi->timer)) @@ -3413,6 +3414,7 @@ void kvm_s390_gib_destroy(void) int kvm_s390_gib_init(u8 nisc) { + u32 gib_origin; int rc = 0; if (!css_general_characteristics.aiv) { @@ -3434,7 +3436,8 @@ int kvm_s390_gib_init(u8 nisc) } gib->nisc = nisc; - if (chsc_sgib((u32)(u64)gib)) { + gib_origin = virt_to_phys(gib); + if (chsc_sgib(gib_origin)) { pr_err("Associating the GIB with the AIV facility failed\n"); free_page((unsigned long)gib); gib = NULL; From 5fc5b94a273655128159186c87662105db8afeb5 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Fri, 29 Jan 2021 12:29:06 +0100 Subject: [PATCH 26/26] s390/virtio: sort out physical vs virtual pointers usage This does not fix a real bug, since virtual addresses are currently indentical to physical ones. Reviewed-by: Nico Boehr Signed-off-by: Alexander Gordeev Signed-off-by: Janosch Frank --- drivers/s390/virtio/virtio_ccw.c | 46 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 22 deletions(-) diff --git a/drivers/s390/virtio/virtio_ccw.c b/drivers/s390/virtio/virtio_ccw.c index a10dbe632ef9..954fc31b4bc7 100644 --- a/drivers/s390/virtio/virtio_ccw.c +++ b/drivers/s390/virtio/virtio_ccw.c @@ -363,7 +363,7 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, thinint_area->isc = VIRTIO_AIRQ_ISC; ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER; ccw->count = sizeof(*thinint_area); - ccw->cda = (__u32)(unsigned long) thinint_area; + ccw->cda = (__u32)virt_to_phys(thinint_area); } else { /* payload is the address of the indicators */ indicatorp = ccw_device_dma_zalloc(vcdev->cdev, @@ -373,7 +373,7 @@ static void virtio_ccw_drop_indicator(struct virtio_ccw_device *vcdev, *indicatorp = 0; ccw->cmd_code = CCW_CMD_SET_IND; ccw->count = sizeof(indicators(vcdev)); - ccw->cda = (__u32)(unsigned long) indicatorp; + ccw->cda = (__u32)virt_to_phys(indicatorp); } /* Deregister indicators from host. */ *indicators(vcdev) = 0; @@ -417,7 +417,7 @@ static int virtio_ccw_read_vq_conf(struct virtio_ccw_device *vcdev, ccw->cmd_code = CCW_CMD_READ_VQ_CONF; ccw->flags = 0; ccw->count = sizeof(struct vq_config_block); - ccw->cda = (__u32)(unsigned long)(&vcdev->dma_area->config_block); + ccw->cda = (__u32)virt_to_phys(&vcdev->dma_area->config_block); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_VQ_CONF); if (ret) return ret; @@ -454,7 +454,7 @@ static void virtio_ccw_del_vq(struct virtqueue *vq, struct ccw1 *ccw) } ccw->cmd_code = CCW_CMD_SET_VQ; ccw->flags = 0; - ccw->cda = (__u32)(unsigned long)(info->info_block); + ccw->cda = (__u32)virt_to_phys(info->info_block); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | index); /* @@ -556,7 +556,7 @@ static struct virtqueue *virtio_ccw_setup_vq(struct virtio_device *vdev, } ccw->cmd_code = CCW_CMD_SET_VQ; ccw->flags = 0; - ccw->cda = (__u32)(unsigned long)(info->info_block); + ccw->cda = (__u32)virt_to_phys(info->info_block); err = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_VQ | i); if (err) { dev_warn(&vcdev->cdev->dev, "SET_VQ failed\n"); @@ -590,6 +590,7 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev, { int ret; struct virtio_thinint_area *thinint_area = NULL; + unsigned long indicator_addr; struct airq_info *info; thinint_area = ccw_device_dma_zalloc(vcdev->cdev, @@ -599,21 +600,22 @@ static int virtio_ccw_register_adapter_ind(struct virtio_ccw_device *vcdev, goto out; } /* Try to get an indicator. */ - thinint_area->indicator = get_airq_indicator(vqs, nvqs, - &thinint_area->bit_nr, - &vcdev->airq_info); - if (!thinint_area->indicator) { + indicator_addr = get_airq_indicator(vqs, nvqs, + &thinint_area->bit_nr, + &vcdev->airq_info); + if (!indicator_addr) { ret = -ENOSPC; goto out; } + thinint_area->indicator = virt_to_phys((void *)indicator_addr); info = vcdev->airq_info; thinint_area->summary_indicator = - (unsigned long) get_summary_indicator(info); + virt_to_phys(get_summary_indicator(info)); thinint_area->isc = VIRTIO_AIRQ_ISC; ccw->cmd_code = CCW_CMD_SET_IND_ADAPTER; ccw->flags = CCW_FLAG_SLI; ccw->count = sizeof(*thinint_area); - ccw->cda = (__u32)(unsigned long)thinint_area; + ccw->cda = (__u32)virt_to_phys(thinint_area); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND_ADAPTER); if (ret) { if (ret == -EOPNOTSUPP) { @@ -686,7 +688,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ccw->cmd_code = CCW_CMD_SET_IND; ccw->flags = 0; ccw->count = sizeof(indicators(vcdev)); - ccw->cda = (__u32)(unsigned long) indicatorp; + ccw->cda = (__u32)virt_to_phys(indicatorp); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_IND); if (ret) goto out; @@ -697,7 +699,7 @@ static int virtio_ccw_find_vqs(struct virtio_device *vdev, unsigned nvqs, ccw->cmd_code = CCW_CMD_SET_CONF_IND; ccw->flags = 0; ccw->count = sizeof(indicators2(vcdev)); - ccw->cda = (__u32)(unsigned long) indicatorp; + ccw->cda = (__u32)virt_to_phys(indicatorp); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_SET_CONF_IND); if (ret) goto out; @@ -759,7 +761,7 @@ static u64 virtio_ccw_get_features(struct virtio_device *vdev) ccw->cmd_code = CCW_CMD_READ_FEAT; ccw->flags = 0; ccw->count = sizeof(*features); - ccw->cda = (__u32)(unsigned long)features; + ccw->cda = (__u32)virt_to_phys(features); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_FEAT); if (ret) { rc = 0; @@ -776,7 +778,7 @@ static u64 virtio_ccw_get_features(struct virtio_device *vdev) ccw->cmd_code = CCW_CMD_READ_FEAT; ccw->flags = 0; ccw->count = sizeof(*features); - ccw->cda = (__u32)(unsigned long)features; + ccw->cda = (__u32)virt_to_phys(features); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_FEAT); if (ret == 0) rc |= (u64)le32_to_cpu(features->features) << 32; @@ -829,7 +831,7 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev) ccw->cmd_code = CCW_CMD_WRITE_FEAT; ccw->flags = 0; ccw->count = sizeof(*features); - ccw->cda = (__u32)(unsigned long)features; + ccw->cda = (__u32)virt_to_phys(features); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT); if (ret) goto out_free; @@ -843,7 +845,7 @@ static int virtio_ccw_finalize_features(struct virtio_device *vdev) ccw->cmd_code = CCW_CMD_WRITE_FEAT; ccw->flags = 0; ccw->count = sizeof(*features); - ccw->cda = (__u32)(unsigned long)features; + ccw->cda = (__u32)virt_to_phys(features); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT); out_free: @@ -875,7 +877,7 @@ static void virtio_ccw_get_config(struct virtio_device *vdev, ccw->cmd_code = CCW_CMD_READ_CONF; ccw->flags = 0; ccw->count = offset + len; - ccw->cda = (__u32)(unsigned long)config_area; + ccw->cda = (__u32)virt_to_phys(config_area); ret = ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_CONFIG); if (ret) goto out_free; @@ -922,7 +924,7 @@ static void virtio_ccw_set_config(struct virtio_device *vdev, ccw->cmd_code = CCW_CMD_WRITE_CONF; ccw->flags = 0; ccw->count = offset + len; - ccw->cda = (__u32)(unsigned long)config_area; + ccw->cda = (__u32)virt_to_phys(config_area); ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_CONFIG); out_free: @@ -946,7 +948,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev) ccw->cmd_code = CCW_CMD_READ_STATUS; ccw->flags = 0; ccw->count = sizeof(vcdev->dma_area->status); - ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status; + ccw->cda = (__u32)virt_to_phys(&vcdev->dma_area->status); ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_READ_STATUS); /* * If the channel program failed (should only happen if the device @@ -975,7 +977,7 @@ static void virtio_ccw_set_status(struct virtio_device *vdev, u8 status) ccw->cmd_code = CCW_CMD_WRITE_STATUS; ccw->flags = 0; ccw->count = sizeof(status); - ccw->cda = (__u32)(unsigned long)&vcdev->dma_area->status; + ccw->cda = (__u32)virt_to_phys(&vcdev->dma_area->status); /* We use ssch for setting the status which is a serializing * instruction that guarantees the memory writes have * completed before ssch. @@ -1274,7 +1276,7 @@ static int virtio_ccw_set_transport_rev(struct virtio_ccw_device *vcdev) ccw->cmd_code = CCW_CMD_SET_VIRTIO_REV; ccw->flags = 0; ccw->count = sizeof(*rev); - ccw->cda = (__u32)(unsigned long)rev; + ccw->cda = (__u32)virt_to_phys(rev); vcdev->revision = VIRTIO_CCW_REV_MAX; do {