From 880df2d46a3f23f30f954f6e64c576d7f411cc46 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:13 +0200 Subject: [PATCH 001/135] powerpc/kuap: Avoid unnecessary reads of MD_AP A disassembly of interrupt_exit_kernel_prepare() shows a useless read of MD_AP register. This is shown by r9 being re-used immediately without doing anything with the value read. c000e0e0: 60 00 00 00 nop c000e0e4: ===> 7d 3a c2 a6 mfmd_ap r9 <==== c000e0e8: 7d 20 00 a6 mfmsr r9 c000e0ec: 7c 51 13 a6 mtspr 81,r2 c000e0f0: 81 3f 00 84 lwz r9,132(r31) c000e0f4: 71 29 80 00 andi. r9,r9,32768 kuap_get_and_assert_locked() is paired with kuap_kernel_restore() and are only used in interrupt_exit_kernel_prepare(). The value returned by kuap_get_and_assert_locked() is only used by kuap_kernel_restore(). On 8xx, kuap_kernel_restore() doesn't use the value read by kuap_get_and_assert_locked() so modify kuap_get_and_assert_locked() to not perform the read of MD_AP and return 0 instead. The same applies on BOOKE. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/bcbc84c2dd90bb1021da792b1968cdc22112dad8.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 8 ++------ arch/powerpc/include/asm/nohash/kup-booke.h | 6 ++---- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index c44d97751723c..8579210f2a6a3 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -41,14 +41,10 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua static inline unsigned long __kuap_get_and_assert_locked(void) { - unsigned long kuap; - - kuap = mfspr(SPRN_MD_AP); - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) - WARN_ON_ONCE(kuap >> 16 != MD_APG_KUAP >> 16); + WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16); - return kuap; + return 0; } static inline void __allow_user_access(void __user *to, const void __user *from, diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h index 49bb41ed0816e..823c5a3a96d80 100644 --- a/arch/powerpc/include/asm/nohash/kup-booke.h +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -58,12 +58,10 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua static inline unsigned long __kuap_get_and_assert_locked(void) { - unsigned long kuap = mfspr(SPRN_PID); - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) - WARN_ON_ONCE(kuap); + WARN_ON_ONCE(mfspr(SPRN_PID)); - return kuap; + return 0; } static inline void __allow_user_access(void __user *to, const void __user *from, From 1bec4adcd59e923df6b7f5d492a9e4b8dfd22039 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:14 +0200 Subject: [PATCH 002/135] powerpc/kuap: Avoid useless jump_label on empty function Disassembly of interrupt_enter_prepare() shows a pointless nop before the mftb c000abf0 : c000abf0: 81 23 00 84 lwz r9,132(r3) c000abf4: 71 29 40 00 andi. r9,r9,16384 c000abf8: 41 82 00 28 beq- c000ac20 c000abfc: ===> 60 00 00 00 nop <==== c000ac00: 7d 0c 42 e6 mftb r8 c000ac04: 80 e2 00 08 lwz r7,8(r2) c000ac08: 81 22 00 28 lwz r9,40(r2) c000ac0c: 91 02 00 24 stw r8,36(r2) c000ac10: 7d 29 38 50 subf r9,r9,r7 c000ac14: 7d 29 42 14 add r9,r9,r8 c000ac18: 91 22 00 08 stw r9,8(r2) c000ac1c: 4e 80 00 20 blr c000ac20: 60 00 00 00 nop c000ac24: 7d 5a c2 a6 mfmd_ap r10 c000ac28: 3d 20 de 00 lis r9,-8704 c000ac2c: 91 43 00 b0 stw r10,176(r3) c000ac30: 7d 3a c3 a6 mtspr 794,r9 c000ac34: 4e 80 00 20 blr That comes from the call to kuap_loc(), allthough __kuap_lock() is an empty function on the 8xx. To avoid that, only perform kuap_is_disabled() check when there is something to do with __kuap_lock(). Do the same with __kuap_save_and_lock() and __kuap_get_and_assert_locked(). Signed-off-by: Christophe Leroy Reviewed-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/a854d25bea375d4ba6ca9c2617f9edbba397100a.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 6 ++-- arch/powerpc/include/asm/book3s/64/kup.h | 10 ++---- arch/powerpc/include/asm/kup.h | 33 +++++++++----------- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 11 +++---- arch/powerpc/include/asm/nohash/kup-booke.h | 8 +++-- 5 files changed, 29 insertions(+), 39 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 678f9c9d89b6a..466a19cfb4dfd 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -77,10 +77,6 @@ static inline void kuap_unlock(unsigned long addr, bool ool) kuap_unlock_all_ool(); } -static inline void __kuap_lock(void) -{ -} - static inline void __kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; @@ -92,6 +88,7 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs) current->thread.kuap = KUAP_NONE; kuap_lock_addr(kuap, false); } +#define __kuap_save_and_lock __kuap_save_and_lock static inline void kuap_user_restore(struct pt_regs *regs) { @@ -120,6 +117,7 @@ static inline unsigned long __kuap_get_and_assert_locked(void) return kuap; } +#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked static __always_inline void __allow_user_access(void __user *to, const void __user *from, u32 size, unsigned long dir) diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 84c09e546115a..2a7bd3ecc5565 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -298,15 +298,9 @@ static inline unsigned long __kuap_get_and_assert_locked(void) WARN_ON_ONCE(amr != AMR_KUAP_BLOCKED); return amr; } +#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked -/* Do nothing, book3s/64 does that in ASM */ -static inline void __kuap_lock(void) -{ -} - -static inline void __kuap_save_and_lock(struct pt_regs *regs) -{ -} +/* __kuap_lock() not required, book3s/64 does that in ASM */ /* * We support individually allowing read or write, but we don't support nesting diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index d751ddd081108..24cde16c4fbed 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -52,16 +52,9 @@ __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return false; } -static inline void __kuap_lock(void) { } -static inline void __kuap_save_and_lock(struct pt_regs *regs) { } static inline void kuap_user_restore(struct pt_regs *regs) { } static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } -static inline unsigned long __kuap_get_and_assert_locked(void) -{ - return 0; -} - /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush * the L1D cache after user accesses. Only include the empty stubs for other @@ -85,29 +78,24 @@ bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) return __bad_kuap_fault(regs, address, is_write); } -static __always_inline void kuap_assert_locked(void) -{ - if (kuap_is_disabled()) - return; - - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) - __kuap_get_and_assert_locked(); -} - static __always_inline void kuap_lock(void) { +#ifdef __kuap_lock if (kuap_is_disabled()) return; __kuap_lock(); +#endif } static __always_inline void kuap_save_and_lock(struct pt_regs *regs) { +#ifdef __kuap_save_and_lock if (kuap_is_disabled()) return; __kuap_save_and_lock(regs); +#endif } static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) @@ -120,10 +108,17 @@ static __always_inline void kuap_kernel_restore(struct pt_regs *regs, unsigned l static __always_inline unsigned long kuap_get_and_assert_locked(void) { - if (kuap_is_disabled()) - return 0; +#ifdef __kuap_get_and_assert_locked + if (!kuap_is_disabled()) + return __kuap_get_and_assert_locked(); +#endif + return 0; +} - return __kuap_get_and_assert_locked(); +static __always_inline void kuap_assert_locked(void) +{ + if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) + kuap_get_and_assert_locked(); } #ifndef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index 8579210f2a6a3..a372cd822887b 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -20,15 +20,12 @@ static __always_inline bool kuap_is_disabled(void) return static_branch_unlikely(&disable_kuap_key); } -static inline void __kuap_lock(void) -{ -} - static inline void __kuap_save_and_lock(struct pt_regs *regs) { regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } +#define __kuap_save_and_lock __kuap_save_and_lock static inline void kuap_user_restore(struct pt_regs *regs) { @@ -39,13 +36,15 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua mtspr(SPRN_MD_AP, regs->kuap); } +#ifdef CONFIG_PPC_KUAP_DEBUG static inline unsigned long __kuap_get_and_assert_locked(void) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) - WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16); + WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16); return 0; } +#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked +#endif static inline void __allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h index 823c5a3a96d80..71182cbe20c3e 100644 --- a/arch/powerpc/include/asm/nohash/kup-booke.h +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -30,6 +30,7 @@ static inline void __kuap_lock(void) mtspr(SPRN_PID, 0); isync(); } +#define __kuap_lock __kuap_lock static inline void __kuap_save_and_lock(struct pt_regs *regs) { @@ -37,6 +38,7 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs) mtspr(SPRN_PID, 0); isync(); } +#define __kuap_save_and_lock __kuap_save_and_lock static inline void kuap_user_restore(struct pt_regs *regs) { @@ -56,13 +58,15 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua /* Context synchronisation is performed by rfi */ } +#ifdef CONFIG_PPC_KUAP_DEBUG static inline unsigned long __kuap_get_and_assert_locked(void) { - if (IS_ENABLED(CONFIG_PPC_KUAP_DEBUG)) - WARN_ON_ONCE(mfspr(SPRN_PID)); + WARN_ON_ONCE(mfspr(SPRN_PID)); return 0; } +#define __kuap_get_and_assert_locked __kuap_get_and_assert_locked +#endif static inline void __allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) From 38bb171b958480b484e8e980be76c7d3656881ea Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:15 +0200 Subject: [PATCH 003/135] powerpc/kuap: Fold kuep_is_disabled() into its only user kuep_is_disabled() was introduced by commit 91bb30822a2e ("powerpc/32s: Refactor update of user segment registers") but then all users but one were removed by commit 526d4a4c77ae ("powerpc/32s: Do kuep_lock() and kuep_unlock() in assembly"). Fold kuep_is_disabled() into init_new_context() which is its only user. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b2247147c0a8c830ac82966451647850df4a64da.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 5 ----- arch/powerpc/mm/book3s32/mmu_context.c | 2 +- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 466a19cfb4dfd..0da0dea76c473 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -13,11 +13,6 @@ extern struct static_key_false disable_kuap_key; -static __always_inline bool kuep_is_disabled(void) -{ - return !IS_ENABLED(CONFIG_PPC_KUEP); -} - #ifdef CONFIG_PPC_KUAP #include diff --git a/arch/powerpc/mm/book3s32/mmu_context.c b/arch/powerpc/mm/book3s32/mmu_context.c index 269a3eb25a733..1922f9a6b0585 100644 --- a/arch/powerpc/mm/book3s32/mmu_context.c +++ b/arch/powerpc/mm/book3s32/mmu_context.c @@ -71,7 +71,7 @@ int init_new_context(struct task_struct *t, struct mm_struct *mm) mm->context.id = __init_new_context(); mm->context.sr0 = CTX_TO_VSID(mm->context.id, 0); - if (!kuep_is_disabled()) + if (IS_ENABLED(CONFIG_PPC_KUEP)) mm->context.sr0 |= SR_NX; if (!kuap_is_disabled()) mm->context.sr0 |= SR_KS; From 6b289911c80d45fd8da3d24ea14706361381b78d Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:16 +0200 Subject: [PATCH 004/135] powerpc/features: Add capability to update mmu features later On powerpc32, features fixup is performed very early and that's too early to read the cmdline and take into account 'nosmap' parameter. On the other hand, no userspace access is performed that early and KUAP feature fixup can be performed later. Add a function to update mmu features. The function is passed a mask with the features that can be updated. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/31b27ee2c9d338f4f82cd8cd69d6bff979495290.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/feature-fixups.h | 1 + arch/powerpc/lib/feature-fixups.c | 31 ++++++++++++++++++++--- 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index ac605fc369c42..77824bd289a38 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -292,6 +292,7 @@ extern long __start___barrier_nospec_fixup, __stop___barrier_nospec_fixup; extern long __start__btb_flush_fixup, __stop__btb_flush_fixup; void apply_feature_fixups(void); +void update_mmu_feature_fixups(unsigned long mask); void setup_feature_keys(void); #endif diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 80def1c2afcb6..4f82581ca2034 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -67,7 +67,8 @@ static int patch_alt_instruction(u32 *src, u32 *dest, u32 *alt_start, u32 *alt_e return 0; } -static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +static int patch_feature_section_mask(unsigned long value, unsigned long mask, + struct fixup_entry *fcur) { u32 *start, *end, *alt_start, *alt_end, *src, *dest; @@ -79,7 +80,7 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) if ((alt_end - alt_start) > (end - start)) return 1; - if ((value & fcur->mask) == fcur->value) + if ((value & fcur->mask & mask) == (fcur->value & mask)) return 0; src = alt_start; @@ -97,7 +98,8 @@ static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) return 0; } -void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +static void do_feature_fixups_mask(unsigned long value, unsigned long mask, + void *fixup_start, void *fixup_end) { struct fixup_entry *fcur, *fend; @@ -105,7 +107,7 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) fend = fixup_end; for (; fcur < fend; fcur++) { - if (patch_feature_section(value, fcur)) { + if (patch_feature_section_mask(value, mask, fcur)) { WARN_ON(1); printk("Unable to patch feature section at %p - %p" \ " with %p - %p\n", @@ -117,6 +119,11 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } } +void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) +{ + do_feature_fixups_mask(value, ~0, fixup_start, fixup_end); +} + #ifdef CONFIG_PPC_BARRIER_NOSPEC static bool is_fixup_addr_valid(void *dest, size_t size) { @@ -651,6 +658,17 @@ void __init apply_feature_fixups(void) do_final_fixups(); } +void __init update_mmu_feature_fixups(unsigned long mask) +{ + saved_mmu_features &= ~mask; + saved_mmu_features |= cur_cpu_spec->mmu_features & mask; + + do_feature_fixups_mask(cur_cpu_spec->mmu_features, mask, + PTRRELOC(&__start___mmu_ftr_fixup), + PTRRELOC(&__stop___mmu_ftr_fixup)); + mmu_feature_keys_init(); +} + void __init setup_feature_keys(void) { /* @@ -683,6 +701,11 @@ late_initcall(check_features); #define check(x) \ if (!(x)) printk("feature-fixups: test failed at line %d\n", __LINE__); +static int patch_feature_section(unsigned long value, struct fixup_entry *fcur) +{ + return patch_feature_section_mask(value, ~0, fcur); +} + /* This must be after the text it fixes up, vmlinux.lds.S enforces that atm */ static struct fixup_entry fixup; From 4589a2b7894d4266380b65e13291f609cf19dd19 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:17 +0200 Subject: [PATCH 005/135] powerpc/kuap: MMU_FTR_BOOK3S_KUAP becomes MMU_FTR_KUAP In order to reuse MMU_FTR_BOOK3S_KUAP for other targets than BOOK3S, rename it MMU_FTR_KUAP. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c8b6f7b8cd0eeaace96879ed0e0a157faa619451.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/hash-pkey.h | 2 +- arch/powerpc/include/asm/book3s/64/kup.h | 18 +++++++++--------- arch/powerpc/include/asm/mmu.h | 4 ++-- arch/powerpc/kernel/syscall.c | 2 +- arch/powerpc/mm/book3s64/pkeys.c | 2 +- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h index f1e60d579f6c1..6c5564c4fae43 100644 --- a/arch/powerpc/include/asm/book3s/64/hash-pkey.h +++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h @@ -24,7 +24,7 @@ static inline u64 pte_to_hpte_pkey_bits(u64 pteflags, unsigned long flags) ((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) | ((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL)); - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP) || + if (mmu_has_feature(MMU_FTR_KUAP) || mmu_has_feature(MMU_FTR_BOOK3S_KUEP)) { if ((pte_pkey == 0) && (flags & HPTE_USE_KERNEL_KEY)) return HASH_DEFAULT_KERNEL_KEY; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 2a7bd3ecc5565..72fc4263ed267 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -31,7 +31,7 @@ mfspr \gpr2, SPRN_AMR cmpd \gpr1, \gpr2 beq 99f - END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_BOOK3S_KUAP, 68) + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_KUAP, 68) isync mtspr SPRN_AMR, \gpr1 @@ -78,7 +78,7 @@ * No need to restore IAMR when returning to kernel space. */ 100: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67) #endif .endm @@ -91,7 +91,7 @@ LOAD_REG_IMMEDIATE(\gpr2, AMR_KUAP_BLOCKED) 999: tdne \gpr1, \gpr2 EMIT_WARN_ENTRY 999b, __FILE__, __LINE__, (BUGFLAG_WARNING | BUGFLAG_ONCE) - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 67) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 67) #endif .endm #endif @@ -130,7 +130,7 @@ */ BEGIN_MMU_FTR_SECTION_NESTED(68) b 100f // skip_save_amr - END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_BOOK3S_KUAP, 68) + END_MMU_FTR_SECTION_NESTED_IFCLR(MMU_FTR_PKEY | MMU_FTR_KUAP, 68) /* * if pkey is disabled and we are entering from userspace @@ -166,7 +166,7 @@ mtspr SPRN_AMR, \gpr2 isync 102: - END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_BOOK3S_KUAP, 69) + END_MMU_FTR_SECTION_NESTED_IFSET(MMU_FTR_KUAP, 69) /* * if entering from kernel we don't need save IAMR @@ -232,7 +232,7 @@ static inline u64 current_thread_iamr(void) static __always_inline bool kuap_is_disabled(void) { - return !mmu_has_feature(MMU_FTR_BOOK3S_KUAP); + return !mmu_has_feature(MMU_FTR_KUAP); } static inline void kuap_user_restore(struct pt_regs *regs) @@ -243,7 +243,7 @@ static inline void kuap_user_restore(struct pt_regs *regs) if (!mmu_has_feature(MMU_FTR_PKEY)) return; - if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + if (!mmu_has_feature(MMU_FTR_KUAP)) { amr = mfspr(SPRN_AMR); if (amr != regs->amr) restore_amr = true; @@ -317,7 +317,7 @@ static inline unsigned long get_kuap(void) * This has no effect in terms of actually blocking things on hash, * so it doesn't break anything. */ - if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_KUAP)) return AMR_KUAP_BLOCKED; return mfspr(SPRN_AMR); @@ -325,7 +325,7 @@ static inline unsigned long get_kuap(void) static __always_inline void set_kuap(unsigned long value) { - if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) + if (!mmu_has_feature(MMU_FTR_KUAP)) return; /* diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 94b981152667c..82af2e2c5eca7 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -33,7 +33,7 @@ * key 0 controlling userspace addresses on radix * Key 3 on hash */ -#define MMU_FTR_BOOK3S_KUAP ASM_CONST(0x00000200) +#define MMU_FTR_KUAP ASM_CONST(0x00000200) /* * Supports KUEP feature @@ -188,7 +188,7 @@ enum { #endif /* CONFIG_PPC_RADIX_MMU */ #endif #ifdef CONFIG_PPC_KUAP - MMU_FTR_BOOK3S_KUAP | + MMU_FTR_KUAP | #endif /* CONFIG_PPC_KUAP */ #ifdef CONFIG_PPC_MEM_KEYS MMU_FTR_PKEY | diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 18b9d325395f5..77fedb190c936 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -46,7 +46,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) iamr = mfspr(SPRN_IAMR); regs->amr = amr; regs->iamr = iamr; - if (mmu_has_feature(MMU_FTR_BOOK3S_KUAP)) { + if (mmu_has_feature(MMU_FTR_KUAP)) { mtspr(SPRN_AMR, AMR_KUAP_BLOCKED); flush_needed = true; } diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c index 1d2675ab6711d..1257339620338 100644 --- a/arch/powerpc/mm/book3s64/pkeys.c +++ b/arch/powerpc/mm/book3s64/pkeys.c @@ -291,7 +291,7 @@ void setup_kuap(bool disabled) if (smp_processor_id() == boot_cpuid) { pr_info("Activating Kernel Userspace Access Prevention\n"); - cur_cpu_spec->mmu_features |= MMU_FTR_BOOK3S_KUAP; + cur_cpu_spec->mmu_features |= MMU_FTR_KUAP; } /* From 26e041208291bfdea1cb9e26bc94a0f9499efe15 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:18 +0200 Subject: [PATCH 006/135] powerpc/kuap: Use MMU_FTR_KUAP on all and refactor disabling kuap All but book3s/64 use a static branch key for disabling kuap. book3s/64 uses an mmu feature. Refactor all targets to use MMU_FTR_KUAP like book3s/64. For PPC32 that implies updating mmu features fixups once KUAP has been initialised. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/6b3d7c977bad73378ea368bc6818e9c94ea95ab0.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 9 --------- arch/powerpc/include/asm/book3s/64/kup.h | 5 ----- arch/powerpc/include/asm/kup.h | 11 +++++++++++ arch/powerpc/include/asm/nohash/32/kup-8xx.h | 9 --------- arch/powerpc/include/asm/nohash/kup-booke.h | 8 -------- arch/powerpc/kernel/cputable.c | 4 ++++ arch/powerpc/mm/book3s32/kuap.c | 5 +---- arch/powerpc/mm/init_32.c | 2 ++ arch/powerpc/mm/nohash/kup.c | 6 +----- 9 files changed, 19 insertions(+), 40 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 0da0dea76c473..4ca6122ef0e13 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -9,10 +9,6 @@ #ifndef __ASSEMBLY__ -#include - -extern struct static_key_false disable_kuap_key; - #ifdef CONFIG_PPC_KUAP #include @@ -20,11 +16,6 @@ extern struct static_key_false disable_kuap_key; #define KUAP_NONE (~0UL) #define KUAP_ALL (~1UL) -static __always_inline bool kuap_is_disabled(void) -{ - return static_branch_unlikely(&disable_kuap_key); -} - static inline void kuap_lock_one(unsigned long addr) { mtsr(mfsr(addr) | SR_KS, addr); diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index 72fc4263ed267..a014f4d9a2aa5 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -230,11 +230,6 @@ static inline u64 current_thread_iamr(void) #ifdef CONFIG_PPC_KUAP -static __always_inline bool kuap_is_disabled(void) -{ - return !mmu_has_feature(MMU_FTR_KUAP); -} - static inline void kuap_user_restore(struct pt_regs *regs) { bool restore_amr = false, restore_iamr = false; diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 24cde16c4fbed..bab161b609c16 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -6,6 +6,12 @@ #define KUAP_WRITE 2 #define KUAP_READ_WRITE (KUAP_READ | KUAP_WRITE) +#ifndef __ASSEMBLY__ +#include + +static __always_inline bool kuap_is_disabled(void); +#endif + #ifdef CONFIG_PPC_BOOK3S_64 #include #endif @@ -41,6 +47,11 @@ void setup_kuep(bool disabled); #ifdef CONFIG_PPC_KUAP void setup_kuap(bool disabled); + +static __always_inline bool kuap_is_disabled(void) +{ + return !mmu_has_feature(MMU_FTR_KUAP); +} #else static inline void setup_kuap(bool disabled) { } diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index a372cd822887b..d0601859c45a0 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -9,17 +9,8 @@ #ifndef __ASSEMBLY__ -#include - #include -extern struct static_key_false disable_kuap_key; - -static __always_inline bool kuap_is_disabled(void) -{ - return static_branch_unlikely(&disable_kuap_key); -} - static inline void __kuap_save_and_lock(struct pt_regs *regs) { regs->kuap = mfspr(SPRN_MD_AP); diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h index 71182cbe20c3e..8e4734c8fef19 100644 --- a/arch/powerpc/include/asm/nohash/kup-booke.h +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -13,18 +13,10 @@ #else -#include #include #include -extern struct static_key_false disable_kuap_key; - -static __always_inline bool kuap_is_disabled(void) -{ - return static_branch_unlikely(&disable_kuap_key); -} - static inline void __kuap_lock(void) { mtspr(SPRN_PID, 0); diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 8a32bffefa5b7..e97a0fd0ae905 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -75,6 +75,10 @@ static struct cpu_spec * __init setup_cpu_spec(unsigned long offset, t->cpu_features |= old.cpu_features & CPU_FTR_PMAO_BUG; } + /* Set kuap ON at startup, will be disabled later if cmdline has 'nosmap' */ + if (IS_ENABLED(CONFIG_PPC_KUAP) && IS_ENABLED(CONFIG_PPC32)) + t->mmu_features |= MMU_FTR_KUAP; + *PTRRELOC(&cur_cpu_spec) = &the_cpu_spec; /* diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 28676cabb005d..24c1c686e6b98 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -3,9 +3,6 @@ #include #include -struct static_key_false disable_kuap_key; -EXPORT_SYMBOL(disable_kuap_key); - void kuap_lock_all_ool(void) { kuap_lock_all(); @@ -30,7 +27,7 @@ void setup_kuap(bool disabled) return; if (disabled) - static_branch_enable(&disable_kuap_key); + cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP; else pr_info("Activating Kernel Userspace Access Protection\n"); } diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c index d4cc3749e6214..d8adc452f4310 100644 --- a/arch/powerpc/mm/init_32.c +++ b/arch/powerpc/mm/init_32.c @@ -126,6 +126,8 @@ void __init MMU_init(void) setup_kup(); + update_mmu_feature_fixups(MMU_FTR_KUAP); + /* Shortly after that, the entire linear mapping will be available */ memblock_set_current_limit(lowmem_end_addr); } diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c index 552becf90e97a..94ff82b9ae600 100644 --- a/arch/powerpc/mm/nohash/kup.c +++ b/arch/powerpc/mm/nohash/kup.c @@ -5,7 +5,6 @@ #include #include -#include #include #include @@ -13,16 +12,13 @@ #include #ifdef CONFIG_PPC_KUAP -struct static_key_false disable_kuap_key; -EXPORT_SYMBOL(disable_kuap_key); - void setup_kuap(bool disabled) { if (disabled) { if (IS_ENABLED(CONFIG_40x)) disable_kuep = true; if (smp_processor_id() == boot_cpuid) - static_branch_enable(&disable_kuap_key); + cur_cpu_spec->mmu_features &= ~MMU_FTR_KUAP; return; } From 5222a1d5142ec4f9ec063b274b80e20639584dbc Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:19 +0200 Subject: [PATCH 007/135] powerpc/kuap: Simplify KUAP lock/unlock on BOOK3S/32 On book3s/32 KUAP is performed at segment level. At the moment, when enabling userspace access, only current segment is modified. Then if a write is performed on another user segment, a fault is taken and all other user segments get enabled for userspace access. This then require special attention when disabling userspace access. Having a userspace write access crossing a segment boundary is unlikely. Having a userspace write access crossing a segment boundary back and forth is even more unlikely. So, instead of enabling userspace access on all segments when a write fault occurs, just change which segment has userspace access enabled in order to eliminate the case when more than one segment has userspace access enabled. That simplifies userspace access deactivation. There is however a corner case which is even more unlikely but has to be handled anyway: an unaligned access which is crossing a segment boundary. That would definitely require at least having userspace access enabled on the two segments. To avoid complicating the likely case for a so unlikely happening, handle such situation like an alignment exception and emulate the store. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/8de8580513c1a6e880bad1ba9a69d3efad3d4fa5.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 65 +++++++----------------- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/mm/book3s32/kuap.c | 15 +----- 4 files changed, 23 insertions(+), 60 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 4ca6122ef0e13..452d4efa84f5b 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -14,7 +14,6 @@ #include #define KUAP_NONE (~0UL) -#define KUAP_ALL (~1UL) static inline void kuap_lock_one(unsigned long addr) { @@ -28,41 +27,6 @@ static inline void kuap_unlock_one(unsigned long addr) isync(); /* Context sync required after mtsr() */ } -static inline void kuap_lock_all(void) -{ - update_user_segments(mfsr(0) | SR_KS); - isync(); /* Context sync required after mtsr() */ -} - -static inline void kuap_unlock_all(void) -{ - update_user_segments(mfsr(0) & ~SR_KS); - isync(); /* Context sync required after mtsr() */ -} - -void kuap_lock_all_ool(void); -void kuap_unlock_all_ool(void); - -static inline void kuap_lock_addr(unsigned long addr, bool ool) -{ - if (likely(addr != KUAP_ALL)) - kuap_lock_one(addr); - else if (!ool) - kuap_lock_all(); - else - kuap_lock_all_ool(); -} - -static inline void kuap_unlock(unsigned long addr, bool ool) -{ - if (likely(addr != KUAP_ALL)) - kuap_unlock_one(addr); - else if (!ool) - kuap_unlock_all(); - else - kuap_unlock_all_ool(); -} - static inline void __kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; @@ -72,7 +36,7 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs) return; current->thread.kuap = KUAP_NONE; - kuap_lock_addr(kuap, false); + kuap_lock_one(kuap); } #define __kuap_save_and_lock __kuap_save_and_lock @@ -84,7 +48,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua { if (unlikely(kuap != KUAP_NONE)) { current->thread.kuap = KUAP_NONE; - kuap_lock_addr(kuap, false); + kuap_lock_one(kuap); } if (likely(regs->kuap == KUAP_NONE)) @@ -92,7 +56,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua current->thread.kuap = regs->kuap; - kuap_unlock(regs->kuap, false); + kuap_unlock_one(regs->kuap); } static inline unsigned long __kuap_get_and_assert_locked(void) @@ -127,7 +91,7 @@ static __always_inline void __prevent_user_access(unsigned long dir) return; current->thread.kuap = KUAP_NONE; - kuap_lock_addr(kuap, true); + kuap_lock_one(kuap); } static inline unsigned long __prevent_user_access_return(void) @@ -136,7 +100,7 @@ static inline unsigned long __prevent_user_access_return(void) if (flags != KUAP_NONE) { current->thread.kuap = KUAP_NONE; - kuap_lock_addr(flags, true); + kuap_lock_one(flags); } return flags; @@ -146,7 +110,7 @@ static inline void __restore_user_access(unsigned long flags) { if (flags != KUAP_NONE) { current->thread.kuap = flags; - kuap_unlock(flags, true); + kuap_unlock_one(flags); } } @@ -155,14 +119,23 @@ __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { unsigned long kuap = regs->kuap; - if (!is_write || kuap == KUAP_ALL) + if (!is_write) return false; if (kuap == KUAP_NONE) return true; - /* If faulting address doesn't match unlocked segment, unlock all */ - if ((kuap ^ address) & 0xf0000000) - regs->kuap = KUAP_ALL; + /* + * If faulting address doesn't match unlocked segment, change segment. + * In case of unaligned store crossing two segments, emulate store. + */ + if ((kuap ^ address) & 0xf0000000) { + if (!(kuap & 0x0fffffff) && address > kuap - 4 && fix_alignment(regs)) { + regs_add_return_ip(regs, 4); + emulate_single_step(regs); + } else { + regs->kuap = address; + } + } return false; } diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 00c6b0b4ede48..1db485aacbd9b 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -120,6 +120,7 @@ struct pt_regs; void hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); +void emulate_single_step(struct pt_regs *regs); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 7ef147e2a20d7..f5ce282dc4b8b 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1158,7 +1158,7 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) * pretend we got a single-step exception. This was pointed out * by Kumar Gala. -- paulus */ -static void emulate_single_step(struct pt_regs *regs) +void emulate_single_step(struct pt_regs *regs) { if (single_stepping(regs)) __single_step_exception(regs); diff --git a/arch/powerpc/mm/book3s32/kuap.c b/arch/powerpc/mm/book3s32/kuap.c index 24c1c686e6b98..3a8815555a489 100644 --- a/arch/powerpc/mm/book3s32/kuap.c +++ b/arch/powerpc/mm/book3s32/kuap.c @@ -3,22 +3,11 @@ #include #include -void kuap_lock_all_ool(void) -{ - kuap_lock_all(); -} -EXPORT_SYMBOL(kuap_lock_all_ool); - -void kuap_unlock_all_ool(void) -{ - kuap_unlock_all(); -} -EXPORT_SYMBOL(kuap_unlock_all_ool); - void setup_kuap(bool disabled) { if (!disabled) { - kuap_lock_all_ool(); + update_user_segments(mfsr(0) | SR_KS); + isync(); /* Context sync required after mtsr() */ init_mm.context.sr0 |= SR_KS; current->thread.sr0 |= SR_KS; } From eb52f66f0abd468caf8be4e690d7fdef96250c2f Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:20 +0200 Subject: [PATCH 008/135] powerpc/kuap: KUAP enabling/disabling functions must be __always_inline Objtool reports following warnings: arch/powerpc/kernel/signal_32.o: warning: objtool: __prevent_user_access.constprop.0+0x4 (.text+0x4): redundant UACCESS disable arch/powerpc/kernel/signal_32.o: warning: objtool: user_access_begin+0x2c (.text+0x4c): return with UACCESS enabled arch/powerpc/kernel/signal_32.o: warning: objtool: handle_rt_signal32+0x188 (.text+0x360): call to __prevent_user_access.constprop.0() with UACCESS enabled arch/powerpc/kernel/signal_32.o: warning: objtool: handle_signal32+0x150 (.text+0x4d4): call to __prevent_user_access.constprop.0() with UACCESS enabled This is due to some KUAP enabling/disabling functions being outline allthough they are marked inline. Use __always_inline instead. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/ca5e50ddbec3867db5146ebddbc9a1dc0e443bc8.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 18 +++++++-------- arch/powerpc/include/asm/book3s/64/kup.h | 23 ++++++++++---------- arch/powerpc/include/asm/kup.h | 16 +++++++------- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 20 ++++++++--------- arch/powerpc/include/asm/nohash/kup-booke.h | 22 +++++++++---------- arch/powerpc/include/asm/uaccess.h | 6 ++--- 6 files changed, 53 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 452d4efa84f5b..931d200afe56f 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -15,19 +15,19 @@ #define KUAP_NONE (~0UL) -static inline void kuap_lock_one(unsigned long addr) +static __always_inline void kuap_lock_one(unsigned long addr) { mtsr(mfsr(addr) | SR_KS, addr); isync(); /* Context sync required after mtsr() */ } -static inline void kuap_unlock_one(unsigned long addr) +static __always_inline void kuap_unlock_one(unsigned long addr) { mtsr(mfsr(addr) & ~SR_KS, addr); isync(); /* Context sync required after mtsr() */ } -static inline void __kuap_save_and_lock(struct pt_regs *regs) +static __always_inline void __kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; @@ -40,11 +40,11 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs) } #define __kuap_save_and_lock __kuap_save_and_lock -static inline void kuap_user_restore(struct pt_regs *regs) +static __always_inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { if (unlikely(kuap != KUAP_NONE)) { current->thread.kuap = KUAP_NONE; @@ -59,7 +59,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua kuap_unlock_one(regs->kuap); } -static inline unsigned long __kuap_get_and_assert_locked(void) +static __always_inline unsigned long __kuap_get_and_assert_locked(void) { unsigned long kuap = current->thread.kuap; @@ -94,7 +94,7 @@ static __always_inline void __prevent_user_access(unsigned long dir) kuap_lock_one(kuap); } -static inline unsigned long __prevent_user_access_return(void) +static __always_inline unsigned long __prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; @@ -106,7 +106,7 @@ static inline unsigned long __prevent_user_access_return(void) return flags; } -static inline void __restore_user_access(unsigned long flags) +static __always_inline void __restore_user_access(unsigned long flags) { if (flags != KUAP_NONE) { current->thread.kuap = flags; @@ -114,7 +114,7 @@ static inline void __restore_user_access(unsigned long flags) } } -static inline bool +static __always_inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { unsigned long kuap = regs->kuap; diff --git a/arch/powerpc/include/asm/book3s/64/kup.h b/arch/powerpc/include/asm/book3s/64/kup.h index a014f4d9a2aa5..497a7bd31ecc0 100644 --- a/arch/powerpc/include/asm/book3s/64/kup.h +++ b/arch/powerpc/include/asm/book3s/64/kup.h @@ -213,14 +213,14 @@ extern u64 __ro_after_init default_iamr; * access restrictions. Because of this ignore AMR value when accessing * userspace via kernel thread. */ -static inline u64 current_thread_amr(void) +static __always_inline u64 current_thread_amr(void) { if (current->thread.regs) return current->thread.regs->amr; return default_amr; } -static inline u64 current_thread_iamr(void) +static __always_inline u64 current_thread_iamr(void) { if (current->thread.regs) return current->thread.regs->iamr; @@ -230,7 +230,7 @@ static inline u64 current_thread_iamr(void) #ifdef CONFIG_PPC_KUAP -static inline void kuap_user_restore(struct pt_regs *regs) +static __always_inline void kuap_user_restore(struct pt_regs *regs) { bool restore_amr = false, restore_iamr = false; unsigned long amr, iamr; @@ -269,7 +269,7 @@ static inline void kuap_user_restore(struct pt_regs *regs) */ } -static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) +static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { if (likely(regs->amr == amr)) return; @@ -285,7 +285,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr */ } -static inline unsigned long __kuap_get_and_assert_locked(void) +static __always_inline unsigned long __kuap_get_and_assert_locked(void) { unsigned long amr = mfspr(SPRN_AMR); @@ -302,7 +302,7 @@ static inline unsigned long __kuap_get_and_assert_locked(void) * because that would require an expensive read/modify write of the AMR. */ -static inline unsigned long get_kuap(void) +static __always_inline unsigned long get_kuap(void) { /* * We return AMR_KUAP_BLOCKED when we don't support KUAP because @@ -332,7 +332,8 @@ static __always_inline void set_kuap(unsigned long value) isync(); } -static inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) +static __always_inline bool +__bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { /* * For radix this will be a storage protection fault (DSISR_PROTFAULT). @@ -375,12 +376,12 @@ static __always_inline void allow_user_access(void __user *to, const void __user #else /* CONFIG_PPC_KUAP */ -static inline unsigned long get_kuap(void) +static __always_inline unsigned long get_kuap(void) { return AMR_KUAP_BLOCKED; } -static inline void set_kuap(unsigned long value) { } +static __always_inline void set_kuap(unsigned long value) { } static __always_inline void allow_user_access(void __user *to, const void __user *from, unsigned long size, unsigned long dir) @@ -395,7 +396,7 @@ static __always_inline void prevent_user_access(unsigned long dir) do_uaccess_flush(); } -static inline unsigned long prevent_user_access_return(void) +static __always_inline unsigned long prevent_user_access_return(void) { unsigned long flags = get_kuap(); @@ -406,7 +407,7 @@ static inline unsigned long prevent_user_access_return(void) return flags; } -static inline void restore_user_access(unsigned long flags) +static __always_inline void restore_user_access(unsigned long flags) { set_kuap(flags); if (static_branch_unlikely(&uaccess_flush_key) && flags == AMR_KUAP_BLOCKED) diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index bab161b609c16..77adb9cd2da57 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -57,14 +57,14 @@ static inline void setup_kuap(bool disabled) { } static __always_inline bool kuap_is_disabled(void) { return true; } -static inline bool +static __always_inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return false; } -static inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } +static __always_inline void kuap_user_restore(struct pt_regs *regs) { } +static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr) { } /* * book3s/64/kup-radix.h defines these functions for the !KUAP case to flush @@ -72,11 +72,11 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long amr * platforms. */ #ifndef CONFIG_PPC_BOOK3S_64 -static inline void __allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } -static inline void __prevent_user_access(unsigned long dir) { } -static inline unsigned long __prevent_user_access_return(void) { return 0UL; } -static inline void __restore_user_access(unsigned long flags) { } +static __always_inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +static __always_inline void __prevent_user_access(unsigned long dir) { } +static __always_inline unsigned long __prevent_user_access_return(void) { return 0UL; } +static __always_inline void __restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index d0601859c45a0..e231b3afed98b 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -11,24 +11,24 @@ #include -static inline void __kuap_save_and_lock(struct pt_regs *regs) +static __always_inline void __kuap_save_and_lock(struct pt_regs *regs) { regs->kuap = mfspr(SPRN_MD_AP); mtspr(SPRN_MD_AP, MD_APG_KUAP); } #define __kuap_save_and_lock __kuap_save_and_lock -static inline void kuap_user_restore(struct pt_regs *regs) +static __always_inline void kuap_user_restore(struct pt_regs *regs) { } -static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { mtspr(SPRN_MD_AP, regs->kuap); } #ifdef CONFIG_PPC_KUAP_DEBUG -static inline unsigned long __kuap_get_and_assert_locked(void) +static __always_inline unsigned long __kuap_get_and_assert_locked(void) { WARN_ON_ONCE(mfspr(SPRN_MD_AP) >> 16 != MD_APG_KUAP >> 16); @@ -37,18 +37,18 @@ static inline unsigned long __kuap_get_and_assert_locked(void) #define __kuap_get_and_assert_locked __kuap_get_and_assert_locked #endif -static inline void __allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_INIT); } -static inline void __prevent_user_access(unsigned long dir) +static __always_inline void __prevent_user_access(unsigned long dir) { mtspr(SPRN_MD_AP, MD_APG_KUAP); } -static inline unsigned long __prevent_user_access_return(void) +static __always_inline unsigned long __prevent_user_access_return(void) { unsigned long flags; @@ -59,12 +59,12 @@ static inline unsigned long __prevent_user_access_return(void) return flags; } -static inline void __restore_user_access(unsigned long flags) +static __always_inline void __restore_user_access(unsigned long flags) { mtspr(SPRN_MD_AP, flags); } -static inline bool +static __always_inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return !((regs->kuap ^ MD_APG_KUAP) & 0xff000000); diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h index 8e4734c8fef19..98780a2d3dcdd 100644 --- a/arch/powerpc/include/asm/nohash/kup-booke.h +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -17,14 +17,14 @@ #include -static inline void __kuap_lock(void) +static __always_inline void __kuap_lock(void) { mtspr(SPRN_PID, 0); isync(); } #define __kuap_lock __kuap_lock -static inline void __kuap_save_and_lock(struct pt_regs *regs) +static __always_inline void __kuap_save_and_lock(struct pt_regs *regs) { regs->kuap = mfspr(SPRN_PID); mtspr(SPRN_PID, 0); @@ -32,7 +32,7 @@ static inline void __kuap_save_and_lock(struct pt_regs *regs) } #define __kuap_save_and_lock __kuap_save_and_lock -static inline void kuap_user_restore(struct pt_regs *regs) +static __always_inline void kuap_user_restore(struct pt_regs *regs) { if (kuap_is_disabled()) return; @@ -42,7 +42,7 @@ static inline void kuap_user_restore(struct pt_regs *regs) /* Context synchronisation is performed by rfi */ } -static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) +static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kuap) { if (regs->kuap) mtspr(SPRN_PID, current->thread.pid); @@ -51,7 +51,7 @@ static inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned long kua } #ifdef CONFIG_PPC_KUAP_DEBUG -static inline unsigned long __kuap_get_and_assert_locked(void) +static __always_inline unsigned long __kuap_get_and_assert_locked(void) { WARN_ON_ONCE(mfspr(SPRN_PID)); @@ -60,20 +60,20 @@ static inline unsigned long __kuap_get_and_assert_locked(void) #define __kuap_get_and_assert_locked __kuap_get_and_assert_locked #endif -static inline void __allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void __allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { mtspr(SPRN_PID, current->thread.pid); isync(); } -static inline void __prevent_user_access(unsigned long dir) +static __always_inline void __prevent_user_access(unsigned long dir) { mtspr(SPRN_PID, 0); isync(); } -static inline unsigned long __prevent_user_access_return(void) +static __always_inline unsigned long __prevent_user_access_return(void) { unsigned long flags = mfspr(SPRN_PID); @@ -83,7 +83,7 @@ static inline unsigned long __prevent_user_access_return(void) return flags; } -static inline void __restore_user_access(unsigned long flags) +static __always_inline void __restore_user_access(unsigned long flags) { if (flags) { mtspr(SPRN_PID, current->thread.pid); @@ -91,7 +91,7 @@ static inline void __restore_user_access(unsigned long flags) } } -static inline bool +static __always_inline bool __bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write) { return !regs->kuap; diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h index a2d255aa96276..fb725ec77926e 100644 --- a/arch/powerpc/include/asm/uaccess.h +++ b/arch/powerpc/include/asm/uaccess.h @@ -386,7 +386,7 @@ copy_mc_to_user(void __user *to, const void *from, unsigned long n) extern long __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size); -static __must_check inline bool user_access_begin(const void __user *ptr, size_t len) +static __must_check __always_inline bool user_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) return false; @@ -401,7 +401,7 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t #define user_access_save prevent_user_access_return #define user_access_restore restore_user_access -static __must_check inline bool +static __must_check __always_inline bool user_read_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) @@ -415,7 +415,7 @@ user_read_access_begin(const void __user *ptr, size_t len) #define user_read_access_begin user_read_access_begin #define user_read_access_end prevent_current_read_from_user -static __must_check inline bool +static __must_check __always_inline bool user_write_access_begin(const void __user *ptr, size_t len) { if (unlikely(!access_ok(ptr, len))) From 3a24ea0df83e32355d897a18bbd82e05986dcdc3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 11 Jul 2023 17:59:21 +0200 Subject: [PATCH 009/135] powerpc/kuap: Use ASM feature fixups instead of static branches To avoid a useless nop on top of every uaccess enable/disable and make life easier for objtool, replace static branches by ASM feature fixups that will nop KUAP enabling instructions out in the unlikely case KUAP is disabled at boottime. Leave it as is on book3s/64 for now, it will be handled later when objtool is activated on PPC64. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/671948788024fd890ec4ed175bc332dab8664ea5.1689091022.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/kup.h | 46 ++++++++++++++++---- arch/powerpc/include/asm/kup.h | 45 +++---------------- arch/powerpc/include/asm/nohash/32/kup-8xx.h | 30 +++++++++---- arch/powerpc/include/asm/nohash/kup-booke.h | 38 +++++++++------- arch/powerpc/mm/nohash/kup.c | 2 +- 5 files changed, 87 insertions(+), 74 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/kup.h b/arch/powerpc/include/asm/book3s/32/kup.h index 931d200afe56f..4e14a5427a632 100644 --- a/arch/powerpc/include/asm/book3s/32/kup.h +++ b/arch/powerpc/include/asm/book3s/32/kup.h @@ -27,6 +27,34 @@ static __always_inline void kuap_unlock_one(unsigned long addr) isync(); /* Context sync required after mtsr() */ } +static __always_inline void uaccess_begin_32s(unsigned long addr) +{ + unsigned long tmp; + + asm volatile(ASM_MMU_FTR_IFSET( + "mfsrin %0, %1;" + "rlwinm %0, %0, 0, %2;" + "mtsrin %0, %1;" + "isync", "", %3) + : "=&r"(tmp) + : "r"(addr), "i"(~SR_KS), "i"(MMU_FTR_KUAP) + : "memory"); +} + +static __always_inline void uaccess_end_32s(unsigned long addr) +{ + unsigned long tmp; + + asm volatile(ASM_MMU_FTR_IFSET( + "mfsrin %0, %1;" + "oris %0, %0, %2;" + "mtsrin %0, %1;" + "isync", "", %3) + : "=&r"(tmp) + : "r"(addr), "i"(SR_KS >> 16), "i"(MMU_FTR_KUAP) + : "memory"); +} + static __always_inline void __kuap_save_and_lock(struct pt_regs *regs) { unsigned long kuap = current->thread.kuap; @@ -69,8 +97,8 @@ static __always_inline unsigned long __kuap_get_and_assert_locked(void) } #define __kuap_get_and_assert_locked __kuap_get_and_assert_locked -static __always_inline void __allow_user_access(void __user *to, const void __user *from, - u32 size, unsigned long dir) +static __always_inline void allow_user_access(void __user *to, const void __user *from, + u32 size, unsigned long dir) { BUILD_BUG_ON(!__builtin_constant_p(dir)); @@ -78,10 +106,10 @@ static __always_inline void __allow_user_access(void __user *to, const void __us return; current->thread.kuap = (__force u32)to; - kuap_unlock_one((__force u32)to); + uaccess_begin_32s((__force u32)to); } -static __always_inline void __prevent_user_access(unsigned long dir) +static __always_inline void prevent_user_access(unsigned long dir) { u32 kuap = current->thread.kuap; @@ -91,26 +119,26 @@ static __always_inline void __prevent_user_access(unsigned long dir) return; current->thread.kuap = KUAP_NONE; - kuap_lock_one(kuap); + uaccess_end_32s(kuap); } -static __always_inline unsigned long __prevent_user_access_return(void) +static __always_inline unsigned long prevent_user_access_return(void) { unsigned long flags = current->thread.kuap; if (flags != KUAP_NONE) { current->thread.kuap = KUAP_NONE; - kuap_lock_one(flags); + uaccess_end_32s(flags); } return flags; } -static __always_inline void __restore_user_access(unsigned long flags) +static __always_inline void restore_user_access(unsigned long flags) { if (flags != KUAP_NONE) { current->thread.kuap = flags; - kuap_unlock_one(flags); + uaccess_begin_32s(flags); } } diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h index 77adb9cd2da57..ad7e8c5aec3f8 100644 --- a/arch/powerpc/include/asm/kup.h +++ b/arch/powerpc/include/asm/kup.h @@ -72,11 +72,11 @@ static __always_inline void __kuap_kernel_restore(struct pt_regs *regs, unsigned * platforms. */ #ifndef CONFIG_PPC_BOOK3S_64 -static __always_inline void __allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) { } -static __always_inline void __prevent_user_access(unsigned long dir) { } -static __always_inline unsigned long __prevent_user_access_return(void) { return 0UL; } -static __always_inline void __restore_user_access(unsigned long flags) { } +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) { } +static __always_inline void prevent_user_access(unsigned long dir) { } +static __always_inline unsigned long prevent_user_access_return(void) { return 0UL; } +static __always_inline void restore_user_access(unsigned long flags) { } #endif /* CONFIG_PPC_BOOK3S_64 */ #endif /* CONFIG_PPC_KUAP */ @@ -132,41 +132,6 @@ static __always_inline void kuap_assert_locked(void) kuap_get_and_assert_locked(); } -#ifndef CONFIG_PPC_BOOK3S_64 -static __always_inline void allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) -{ - if (kuap_is_disabled()) - return; - - __allow_user_access(to, from, size, dir); -} - -static __always_inline void prevent_user_access(unsigned long dir) -{ - if (kuap_is_disabled()) - return; - - __prevent_user_access(dir); -} - -static __always_inline unsigned long prevent_user_access_return(void) -{ - if (kuap_is_disabled()) - return 0; - - return __prevent_user_access_return(); -} - -static __always_inline void restore_user_access(unsigned long flags) -{ - if (kuap_is_disabled()) - return; - - __restore_user_access(flags); -} -#endif /* CONFIG_PPC_BOOK3S_64 */ - static __always_inline void allow_read_from_user(const void __user *from, unsigned long size) { barrier_nospec(); diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h b/arch/powerpc/include/asm/nohash/32/kup-8xx.h index e231b3afed98b..46bc5925e5fdc 100644 --- a/arch/powerpc/include/asm/nohash/32/kup-8xx.h +++ b/arch/powerpc/include/asm/nohash/32/kup-8xx.h @@ -37,31 +37,43 @@ static __always_inline unsigned long __kuap_get_and_assert_locked(void) #define __kuap_get_and_assert_locked __kuap_get_and_assert_locked #endif -static __always_inline void __allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void uaccess_begin_8xx(unsigned long val) { - mtspr(SPRN_MD_AP, MD_APG_INIT); + asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : : + "i"(SPRN_MD_AP), "r"(val), "i"(MMU_FTR_KUAP) : "memory"); } -static __always_inline void __prevent_user_access(unsigned long dir) +static __always_inline void uaccess_end_8xx(void) { - mtspr(SPRN_MD_AP, MD_APG_KUAP); + asm(ASM_MMU_FTR_IFSET("mtspr %0, %1", "", %2) : : + "i"(SPRN_MD_AP), "r"(MD_APG_KUAP), "i"(MMU_FTR_KUAP) : "memory"); +} + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + uaccess_begin_8xx(MD_APG_INIT); } -static __always_inline unsigned long __prevent_user_access_return(void) +static __always_inline void prevent_user_access(unsigned long dir) +{ + uaccess_end_8xx(); +} + +static __always_inline unsigned long prevent_user_access_return(void) { unsigned long flags; flags = mfspr(SPRN_MD_AP); - mtspr(SPRN_MD_AP, MD_APG_KUAP); + uaccess_end_8xx(); return flags; } -static __always_inline void __restore_user_access(unsigned long flags) +static __always_inline void restore_user_access(unsigned long flags) { - mtspr(SPRN_MD_AP, flags); + uaccess_begin_8xx(flags); } static __always_inline bool diff --git a/arch/powerpc/include/asm/nohash/kup-booke.h b/arch/powerpc/include/asm/nohash/kup-booke.h index 98780a2d3dcdd..0c7c3258134c5 100644 --- a/arch/powerpc/include/asm/nohash/kup-booke.h +++ b/arch/powerpc/include/asm/nohash/kup-booke.h @@ -3,6 +3,7 @@ #define _ASM_POWERPC_KUP_BOOKE_H_ #include +#include #ifdef CONFIG_PPC_KUAP @@ -60,35 +61,42 @@ static __always_inline unsigned long __kuap_get_and_assert_locked(void) #define __kuap_get_and_assert_locked __kuap_get_and_assert_locked #endif -static __always_inline void __allow_user_access(void __user *to, const void __user *from, - unsigned long size, unsigned long dir) +static __always_inline void uaccess_begin_booke(unsigned long val) { - mtspr(SPRN_PID, current->thread.pid); - isync(); + asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : : + "i"(SPRN_PID), "r"(val), "i"(MMU_FTR_KUAP) : "memory"); } -static __always_inline void __prevent_user_access(unsigned long dir) +static __always_inline void uaccess_end_booke(void) { - mtspr(SPRN_PID, 0); - isync(); + asm(ASM_MMU_FTR_IFSET("mtspr %0, %1; isync", "", %2) : : + "i"(SPRN_PID), "r"(0), "i"(MMU_FTR_KUAP) : "memory"); +} + +static __always_inline void allow_user_access(void __user *to, const void __user *from, + unsigned long size, unsigned long dir) +{ + uaccess_begin_booke(current->thread.pid); } -static __always_inline unsigned long __prevent_user_access_return(void) +static __always_inline void prevent_user_access(unsigned long dir) +{ + uaccess_end_booke(); +} + +static __always_inline unsigned long prevent_user_access_return(void) { unsigned long flags = mfspr(SPRN_PID); - mtspr(SPRN_PID, 0); - isync(); + uaccess_end_booke(); return flags; } -static __always_inline void __restore_user_access(unsigned long flags) +static __always_inline void restore_user_access(unsigned long flags) { - if (flags) { - mtspr(SPRN_PID, current->thread.pid); - isync(); - } + if (flags) + uaccess_begin_booke(current->thread.pid); } static __always_inline bool diff --git a/arch/powerpc/mm/nohash/kup.c b/arch/powerpc/mm/nohash/kup.c index 94ff82b9ae600..e1f7de2e54ec0 100644 --- a/arch/powerpc/mm/nohash/kup.c +++ b/arch/powerpc/mm/nohash/kup.c @@ -24,6 +24,6 @@ void setup_kuap(bool disabled) pr_info("Activating Kernel Userspace Access Protection\n"); - __prevent_user_access(KUAP_READ_WRITE); + prevent_user_access(KUAP_READ_WRITE); } #endif From 9d6e1c21e1be4643628ee343e0b8d79828485ba2 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Jul 2023 09:46:46 -0700 Subject: [PATCH 010/135] powerpc/crypto: fix missing skcipher dependency for aes-gcm-p10 My stripped down configuration fails to build with: ERROR: modpost: "skcipher_walk_aead_encrypt" [arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined! ERROR: modpost: "skcipher_walk_done" [arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined! ERROR: modpost: "skcipher_walk_aead_decrypt" [arch/powerpc/crypto/aes-gcm-p10-crypto.ko] undefined! Fix it by selecting CRYPTO_SKCIPHER. Signed-off-by: Omar Sandoval Signed-off-by: Michael Ellerman Link: https://msgid.link/c55ad70799e027a3d2756b85ccadc0af52ae8915.1689007370.git.osandov@osandov.com --- arch/powerpc/crypto/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index ad1872518992e..81ae015861c05 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -100,6 +100,7 @@ config CRYPTO_AES_GCM_P10 select CRYPTO_LIB_AES select CRYPTO_ALGAPI select CRYPTO_AEAD + select CRYPTO_SKCIPHER default m help AEAD cipher: AES cipher algorithms (FIPS-197) From 026fa6c52da5fc559d896a62cb6f8e208c22738d Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 10 Jul 2023 09:46:47 -0700 Subject: [PATCH 011/135] powerpc/crypto: don't build aes-gcm-p10 by default None of the other accelerated crypto modules are built by default. Signed-off-by: Omar Sandoval Signed-off-by: Michael Ellerman Link: https://msgid.link/40d9c7ebe82c9a9d4ace542ac433753d2f22c6a0.1689007370.git.osandov@osandov.com --- arch/powerpc/crypto/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 81ae015861c05..97802c72317ca 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -101,7 +101,6 @@ config CRYPTO_AES_GCM_P10 select CRYPTO_ALGAPI select CRYPTO_AEAD select CRYPTO_SKCIPHER - default m help AEAD cipher: AES cipher algorithms (FIPS-197) GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D) From ab481817912ec5c882a6a42ce12c57aed3cfd506 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 17 Jul 2023 21:52:23 +1000 Subject: [PATCH 012/135] powerpc/64: Enable accelerated crypto algorithms in defconfig Enable all the acclerated crypto algorithms as modules in the 64-bit defconfig, to get more test coverage. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230717115223.286158-1-mpe@ellerman.id.au --- arch/powerpc/configs/ppc64_defconfig | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index 268fa361a06d6..40a1f4a4274c5 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -390,8 +390,11 @@ CONFIG_CRYPTO_SHA256=y CONFIG_CRYPTO_WP512=m CONFIG_CRYPTO_LZO=m CONFIG_CRYPTO_CRC32C_VPMSUM=m +CONFIG_CRYPTO_CRCT10DIF_VPMSUM=m +CONFIG_CRYPTO_VPMSUM_TESTER=m CONFIG_CRYPTO_MD5_PPC=m CONFIG_CRYPTO_SHA1_PPC=m +CONFIG_CRYPTO_AES_GCM_P10=m CONFIG_CRYPTO_DEV_NX=y CONFIG_CRYPTO_DEV_NX_ENCRYPT=m CONFIG_CRYPTO_DEV_VMX=y From c3c2e93753484bb4e935ed8205c1f569907f5970 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 May 2023 16:08:18 +1000 Subject: [PATCH 013/135] powerpc: Account mm_cpumask and active_cpus in init_mm init_mm mm_cpumask and context.active_cpus is not maintained at boot and hotplug. This seems to be harmless because init_mm does not have a userspace and so never gets user TLBs flushed, but it looks odd and it prevents some sanity checks being added. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20230524060821.148015-2-npiggin@gmail.com --- arch/powerpc/kernel/setup-common.c | 6 +++++- arch/powerpc/kernel/smp.c | 12 ++++++++++++ arch/powerpc/mm/mmu_context.c | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index d2a446216444f..16843294d978c 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -969,8 +969,12 @@ void __init setup_arch(char **cmdline_p) klp_init_thread_info(&init_task); setup_initial_init_mm(_stext, _etext, _edata, _end); - + /* sched_init() does the mmgrab(&init_mm) for the primary CPU */ + VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm))); + cpumask_set_cpu(smp_processor_id(), mm_cpumask(&init_mm)); + inc_mm_active_cpus(&init_mm); mm_iommu_init(&init_mm); + irqstack_early_init(); exc_lvl_early_init(); emergency_stack_init(); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index fbbb695bae3d2..8cb6d1c902669 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include @@ -1616,6 +1617,9 @@ void start_secondary(void *unused) mmgrab_lazy_tlb(&init_mm); current->active_mm = &init_mm; + VM_WARN_ON(cpumask_test_cpu(smp_processor_id(), mm_cpumask(&init_mm))); + cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); + inc_mm_active_cpus(&init_mm); smp_store_cpu_info(cpu); set_dec(tb_ticks_per_jiffy); @@ -1751,6 +1755,14 @@ int __cpu_disable(void) void __cpu_die(unsigned int cpu) { + /* + * This could perhaps be a generic call in idlea_task_dead(), but + * that requires testing from all archs, so first put it here to + */ + VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(&init_mm))); + dec_mm_active_cpus(&init_mm); + cpumask_clear_cpu(cpu, mm_cpumask(&init_mm)); + if (smp_ops->cpu_die) smp_ops->cpu_die(cpu); } diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 1fb9c99f86797..894468975a441 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -47,6 +47,7 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, /* Mark this context has been used on the new CPU */ if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { + VM_WARN_ON_ONCE(next == &init_mm); cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); inc_mm_active_cpus(next); From f74b2a6c01a0b319070ccee7dea0cc4dad694041 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 May 2023 16:08:19 +1000 Subject: [PATCH 014/135] powerpc/64s: Use dec_mm_active_cpus helper Avoid open-coded atomic_dec on mm->context.active_cpus and use the function made for it. Add CONFIG_DEBUG_VM underflow checking on the counter. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20230524060821.148015-3-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 2 +- arch/powerpc/include/asm/mmu_context.h | 1 + arch/powerpc/mm/book3s64/radix_tlb.c | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 570a4960cf179..5cf0e9c953b32 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -261,7 +261,7 @@ static inline void radix_init_pseries(void) { } #define arch_clear_mm_cpumask_cpu(cpu, mm) \ do { \ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { \ - atomic_dec(&(mm)->context.active_cpus); \ + dec_mm_active_cpus(mm); \ cpumask_clear_cpu(cpu, mm_cpumask(mm)); \ } \ } while (0) diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 57f5017111f47..37bffa0f79183 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -127,6 +127,7 @@ static inline void inc_mm_active_cpus(struct mm_struct *mm) static inline void dec_mm_active_cpus(struct mm_struct *mm) { + VM_WARN_ON_ONCE(atomic_read(&mm->context.active_cpus) <= 0); atomic_dec(&mm->context.active_cpus); } diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 0bd4866d98241..4e72d8007f885 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -820,7 +820,7 @@ void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) * that's what the caller expects. */ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { - atomic_dec(&mm->context.active_cpus); + dec_mm_active_cpus(mm); cpumask_clear_cpu(cpu, mm_cpumask(mm)); always_flush = true; } From 177255afb40548fdf504384b361d18d6cbe35d1e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 May 2023 16:08:20 +1000 Subject: [PATCH 015/135] powerpc: Add mm_cpumask warning when context switching When context switching away from an mm, add a CONFIG_DEBUG_VM warning check to ensure this CPU is still set in the mask. This could catch bugs where the mask is improperly trimmed while the CPU is still using the mm. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20230524060821.148015-4-npiggin@gmail.com --- arch/powerpc/mm/mmu_context.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 894468975a441..b24c19078eb18 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -43,12 +43,13 @@ static inline void switch_mm_pgdir(struct task_struct *tsk, void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + int cpu = smp_processor_id(); bool new_on_cpu = false; /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { + if (!cpumask_test_cpu(cpu, mm_cpumask(next))) { VM_WARN_ON_ONCE(next == &init_mm); - cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + cpumask_set_cpu(cpu, mm_cpumask(next)); inc_mm_active_cpus(next); /* @@ -101,6 +102,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * sub architectures. Out of line for now */ switch_mmu_context(prev, next, tsk); + + VM_WARN_ON_ONCE(!cpumask_test_cpu(cpu, mm_cpumask(prev))); } #ifndef CONFIG_PPC_BOOK3S_64 From e43c0a0c3c2870e1ee29519dc249471adf19ab5f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 24 May 2023 16:08:21 +1000 Subject: [PATCH 016/135] powerpc/64s/radix: combine final TLB flush and lazy tlb mm shootdown IPIs This performs lazy tlb mm shootdown when doing the exit TLB flush when all mm users go away and user mappings are removed, which avoids having to do the lazy tlb mm shootdown IPIs on the final mmput when all kernel references disappear. powerpc/64s uses a broadcast TLBIE for the exit TLB flush if remote CPUs need to be invalidated (unless TLBIE is disabled), so this doesn't necessarily save IPIs but it does avoid a broadcast TLBIE which is quite expensive. Signed-off-by: Nicholas Piggin [mpe: Squash in preempt_disable/enable() fix from Nick] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230524060821.148015-5-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 30 +++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 4e72d8007f885..dd127087fee8a 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -1313,7 +1313,35 @@ void radix__tlb_flush(struct mmu_gather *tlb) * See the comment for radix in arch_exit_mmap(). */ if (tlb->fullmm) { - __flush_all_mm(mm, true); + if (IS_ENABLED(CONFIG_MMU_LAZY_TLB_SHOOTDOWN)) { + /* + * Shootdown based lazy tlb mm refcounting means we + * have to IPI everyone in the mm_cpumask anyway soon + * when the mm goes away, so might as well do it as + * part of the final flush now. + * + * If lazy shootdown was improved to reduce IPIs (e.g., + * by batching), then it may end up being better to use + * tlbies here instead. + */ + preempt_disable(); + + smp_mb(); /* see radix__flush_tlb_mm */ + exit_flush_lazy_tlbs(mm); + _tlbiel_pid(mm->context.id, RIC_FLUSH_ALL); + + /* + * It should not be possible to have coprocessors still + * attached here. + */ + if (WARN_ON_ONCE(atomic_read(&mm->context.copros) > 0)) + __flush_all_mm(mm, true); + + preempt_enable(); + } else { + __flush_all_mm(mm, true); + } + } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) { if (!tlb->freed_tables) radix__flush_tlb_mm(mm); From 81d7cac4d11cc65f29be68c72759429d5194347a Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 24 Jul 2023 15:02:42 -0600 Subject: [PATCH 017/135] powerpc: Explicitly include correct DT includes The DT of_device.h and of_platform.h date back to the separate of_platform_bus_type before it as merged into the regular platform bus. As part of that merge prepping Arm DT support 13 years ago, they "temporarily" include each other. They also include platform_device.h and of.h. As a result, there's a pretty much random mix of those include files used throughout the tree. In order to detangle these headers and replace the implicit includes with struct declarations, users need to explicitly include the correct includes. Signed-off-by: Rob Herring [mpe: Fixup maple/setup.c which needs platform_device] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230724210247.778034-1-robh@kernel.org --- arch/powerpc/include/asm/ibmebus.h | 2 ++ arch/powerpc/include/asm/macio.h | 3 ++- arch/powerpc/kernel/legacy_serial.c | 2 +- arch/powerpc/kernel/of_platform.c | 4 +--- arch/powerpc/kernel/setup-common.c | 4 ++-- arch/powerpc/kexec/file_load_64.c | 2 +- arch/powerpc/kexec/ranges.c | 2 +- arch/powerpc/platforms/4xx/cpm.c | 2 +- arch/powerpc/platforms/4xx/hsta_msi.c | 2 +- arch/powerpc/platforms/4xx/soc.c | 2 +- arch/powerpc/platforms/512x/mpc5121_ads.c | 2 +- arch/powerpc/platforms/512x/mpc512x_generic.c | 2 +- arch/powerpc/platforms/512x/mpc512x_lpbfifo.c | 2 +- arch/powerpc/platforms/512x/pdm360ng.c | 3 ++- arch/powerpc/platforms/52xx/mpc52xx_gpt.c | 3 +-- arch/powerpc/platforms/82xx/ep8248e.c | 1 + arch/powerpc/platforms/83xx/km83xx.c | 4 ++-- arch/powerpc/platforms/83xx/mpc832x_rdb.c | 4 +++- arch/powerpc/platforms/83xx/suspend.c | 2 +- arch/powerpc/platforms/85xx/bsc913x_qds.c | 2 +- arch/powerpc/platforms/85xx/bsc913x_rdb.c | 2 +- arch/powerpc/platforms/85xx/c293pcie.c | 3 +-- arch/powerpc/platforms/85xx/common.c | 1 + arch/powerpc/platforms/85xx/ge_imp3a.c | 2 +- arch/powerpc/platforms/85xx/ksi8560.c | 3 ++- arch/powerpc/platforms/85xx/mpc8536_ds.c | 2 +- arch/powerpc/platforms/85xx/mpc85xx_ds.c | 2 +- arch/powerpc/platforms/85xx/mpc85xx_mds.c | 4 ++-- arch/powerpc/platforms/85xx/mpc85xx_rdb.c | 3 ++- arch/powerpc/platforms/85xx/p1010rdb.c | 2 +- arch/powerpc/platforms/85xx/p1022_ds.c | 2 +- arch/powerpc/platforms/85xx/p1022_rdk.c | 2 +- arch/powerpc/platforms/85xx/p1023_rdb.c | 3 +-- arch/powerpc/platforms/85xx/socrates.c | 2 +- arch/powerpc/platforms/85xx/socrates_fpga_pic.c | 1 - arch/powerpc/platforms/85xx/stx_gp3.c | 2 +- arch/powerpc/platforms/85xx/tqm85xx.c | 2 +- arch/powerpc/platforms/85xx/twr_p102x.c | 3 ++- arch/powerpc/platforms/85xx/xes_mpc85xx.c | 2 +- arch/powerpc/platforms/86xx/common.c | 3 +++ arch/powerpc/platforms/86xx/gef_ppc9a.c | 2 +- arch/powerpc/platforms/86xx/gef_sbc310.c | 2 +- arch/powerpc/platforms/86xx/gef_sbc610.c | 2 +- arch/powerpc/platforms/86xx/mvme7100.c | 1 - arch/powerpc/platforms/86xx/pic.c | 2 +- arch/powerpc/platforms/cell/axon_msi.c | 3 ++- arch/powerpc/platforms/cell/cbe_regs.c | 3 +-- arch/powerpc/platforms/cell/iommu.c | 2 +- arch/powerpc/platforms/cell/setup.c | 1 + arch/powerpc/platforms/cell/spider-pci.c | 1 - arch/powerpc/platforms/embedded6xx/holly.c | 2 +- arch/powerpc/platforms/maple/setup.c | 3 ++- arch/powerpc/platforms/pasemi/gpio_mdio.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 2 ++ arch/powerpc/platforms/powermac/setup.c | 2 +- arch/powerpc/platforms/powernv/opal-imc.c | 1 - arch/powerpc/platforms/powernv/opal-rtc.c | 3 ++- arch/powerpc/platforms/powernv/opal-secvar.c | 2 +- arch/powerpc/platforms/powernv/opal-sensor.c | 2 ++ arch/powerpc/platforms/pseries/ibmebus.c | 1 + arch/powerpc/sysdev/cpm_common.c | 2 -- arch/powerpc/sysdev/cpm_gpio.c | 3 ++- arch/powerpc/sysdev/fsl_pmc.c | 4 ++-- arch/powerpc/sysdev/fsl_rio.c | 4 ++-- arch/powerpc/sysdev/fsl_rmu.c | 1 - arch/powerpc/sysdev/fsl_soc.c | 1 - arch/powerpc/sysdev/mpic_msgr.c | 3 ++- arch/powerpc/sysdev/mpic_timer.c | 1 - arch/powerpc/sysdev/of_rtc.c | 4 ++-- arch/powerpc/sysdev/pmi.c | 4 ++-- 70 files changed, 86 insertions(+), 76 deletions(-) diff --git a/arch/powerpc/include/asm/ibmebus.h b/arch/powerpc/include/asm/ibmebus.h index 088f95b2e14f9..6f33253a364ac 100644 --- a/arch/powerpc/include/asm/ibmebus.h +++ b/arch/powerpc/include/asm/ibmebus.h @@ -46,6 +46,8 @@ #include #include +struct platform_driver; + extern struct bus_type ibmebus_bus_type; int ibmebus_register_driver(struct platform_driver *drv); diff --git a/arch/powerpc/include/asm/macio.h b/arch/powerpc/include/asm/macio.h index ff5fd82d9ff00..3a07c62973aab 100644 --- a/arch/powerpc/include/asm/macio.h +++ b/arch/powerpc/include/asm/macio.h @@ -3,7 +3,8 @@ #define __MACIO_ASIC_H__ #ifdef __KERNEL__ -#include +#include +#include extern struct bus_type macio_bus_type; diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 6ee65741dbd52..1da2f6e7d2a13 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -5,8 +5,8 @@ #include #include #include +#include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c index f89376ff633e7..adc76fa58d1ed 100644 --- a/arch/powerpc/kernel/of_platform.c +++ b/arch/powerpc/kernel/of_platform.c @@ -13,9 +13,7 @@ #include #include #include -#include -#include -#include +#include #include #include diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 16843294d978c..2f1026fba00d5 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -31,9 +31,9 @@ #include #include #include -#include +#include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 110d28bede2a7..7e69be55451a7 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c index 5fc53a5fcfdf6..fb3e12f152144 100644 --- a/arch/powerpc/kexec/ranges.c +++ b/arch/powerpc/kexec/ranges.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/4xx/cpm.c b/arch/powerpc/platforms/4xx/cpm.c index 182e12855c279..670f8ad4465bc 100644 --- a/arch/powerpc/platforms/4xx/cpm.c +++ b/arch/powerpc/platforms/4xx/cpm.c @@ -18,7 +18,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c index e11b57a62b054..c6bd846b0d65f 100644 --- a/arch/powerpc/platforms/4xx/hsta_msi.c +++ b/arch/powerpc/platforms/4xx/hsta_msi.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c index ac1cd8b178799..f91df08278777 100644 --- a/arch/powerpc/platforms/4xx/soc.c +++ b/arch/powerpc/platforms/4xx/soc.c @@ -15,8 +15,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/512x/mpc5121_ads.c b/arch/powerpc/platforms/512x/mpc5121_ads.c index 80b25ce076bc5..a18f85b3ef363 100644 --- a/arch/powerpc/platforms/512x/mpc5121_ads.c +++ b/arch/powerpc/platforms/512x/mpc5121_ads.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/512x/mpc512x_generic.c b/arch/powerpc/platforms/512x/mpc512x_generic.c index 97dfaac8f7ffe..0d58ab257cd93 100644 --- a/arch/powerpc/platforms/512x/mpc512x_generic.c +++ b/arch/powerpc/platforms/512x/mpc512x_generic.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c index c1e981649bd95..4a25b6b486158 100644 --- a/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c +++ b/arch/powerpc/platforms/512x/mpc512x_lpbfifo.c @@ -10,9 +10,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/512x/pdm360ng.c b/arch/powerpc/platforms/512x/pdm360ng.c index 4bdec1c25de75..ce51cfeeb0665 100644 --- a/arch/powerpc/platforms/512x/pdm360ng.c +++ b/arch/powerpc/platforms/512x/pdm360ng.c @@ -7,11 +7,12 @@ * PDM360NG board setup */ +#include #include #include +#include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c index 3fce4e1c3af68..581059527c36a 100644 --- a/arch/powerpc/platforms/52xx/mpc52xx_gpt.c +++ b/arch/powerpc/platforms/52xx/mpc52xx_gpt.c @@ -48,7 +48,6 @@ * the output mode. This driver does not change the output mode setting. */ -#include #include #include #include @@ -57,8 +56,8 @@ #include #include #include -#include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 8f1856ba692e9..16808536f7888 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/83xx/km83xx.c b/arch/powerpc/platforms/83xx/km83xx.c index 26ddc71365473..2b5d187d9b62d 100644 --- a/arch/powerpc/platforms/83xx/km83xx.c +++ b/arch/powerpc/platforms/83xx/km83xx.c @@ -20,8 +20,8 @@ #include #include #include -#include -#include +#include +#include #include #include diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c index 3b4e4173c59ed..d523ce0f48dbb 100644 --- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c +++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c @@ -15,8 +15,10 @@ #include #include #include +#include +#include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 3fa8979ac8a62..9833c36bda838 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/bsc913x_qds.c b/arch/powerpc/platforms/85xx/bsc913x_qds.c index a029aa0905385..2eb62bff86d48 100644 --- a/arch/powerpc/platforms/85xx/bsc913x_qds.c +++ b/arch/powerpc/platforms/85xx/bsc913x_qds.c @@ -9,7 +9,7 @@ * Copyright 2014 Freescale Semiconductor Inc. */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/bsc913x_rdb.c b/arch/powerpc/platforms/85xx/bsc913x_rdb.c index 361b4371d073b..161f006cb3bb5 100644 --- a/arch/powerpc/platforms/85xx/bsc913x_rdb.c +++ b/arch/powerpc/platforms/85xx/bsc913x_rdb.c @@ -7,7 +7,7 @@ * Copyright 2011-2012 Freescale Semiconductor Inc. */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/c293pcie.c b/arch/powerpc/platforms/85xx/c293pcie.c index 34975708be79f..7a63a3ad5e8a4 100644 --- a/arch/powerpc/platforms/85xx/c293pcie.c +++ b/arch/powerpc/platforms/85xx/c293pcie.c @@ -7,8 +7,7 @@ #include #include -#include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/common.c b/arch/powerpc/platforms/85xx/common.c index a554b6d87cf76..757811155587d 100644 --- a/arch/powerpc/platforms/85xx/common.c +++ b/arch/powerpc/platforms/85xx/common.c @@ -3,6 +3,7 @@ * Routines common to most mpc85xx-based boards. */ +#include #include #include diff --git a/arch/powerpc/platforms/85xx/ge_imp3a.c b/arch/powerpc/platforms/85xx/ge_imp3a.c index 3678a1fbf5ad5..9c3b44a1952ec 100644 --- a/arch/powerpc/platforms/85xx/ge_imp3a.c +++ b/arch/powerpc/platforms/85xx/ge_imp3a.c @@ -17,8 +17,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/85xx/ksi8560.c b/arch/powerpc/platforms/85xx/ksi8560.c index af38c3aec0426..1b6326a4b0f24 100644 --- a/arch/powerpc/platforms/85xx/ksi8560.c +++ b/arch/powerpc/platforms/85xx/ksi8560.c @@ -18,7 +18,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/mpc8536_ds.c b/arch/powerpc/platforms/85xx/mpc8536_ds.c index 58ab3831913fe..e966b2ad8ecd4 100644 --- a/arch/powerpc/platforms/85xx/mpc8536_ds.c +++ b/arch/powerpc/platforms/85xx/mpc8536_ds.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/mpc85xx_ds.c b/arch/powerpc/platforms/85xx/mpc85xx_ds.c index 4347d629b5671..2856148321b3c 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_ds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_ds.c @@ -15,8 +15,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/85xx/mpc85xx_mds.c b/arch/powerpc/platforms/85xx/mpc85xx_mds.c index 0546f19416c29..c19490cf6376e 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_mds.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_mds.c @@ -26,8 +26,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c index c42a68da6dfdd..ec9f60fbebc71 100644 --- a/arch/powerpc/platforms/85xx/mpc85xx_rdb.c +++ b/arch/powerpc/platforms/85xx/mpc85xx_rdb.c @@ -12,7 +12,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/p1010rdb.c b/arch/powerpc/platforms/85xx/p1010rdb.c index 14ec79a327466..10d6f1fa33275 100644 --- a/arch/powerpc/platforms/85xx/p1010rdb.c +++ b/arch/powerpc/platforms/85xx/p1010rdb.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/p1022_ds.c b/arch/powerpc/platforms/85xx/p1022_ds.c index 23d0926298b96..0dd786a061a6a 100644 --- a/arch/powerpc/platforms/85xx/p1022_ds.c +++ b/arch/powerpc/platforms/85xx/p1022_ds.c @@ -18,8 +18,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/p1022_rdk.c b/arch/powerpc/platforms/85xx/p1022_rdk.c index d1159150c3b5f..25ab6e9c14703 100644 --- a/arch/powerpc/platforms/85xx/p1022_rdk.c +++ b/arch/powerpc/platforms/85xx/p1022_rdk.c @@ -14,8 +14,8 @@ #include #include +#include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/p1023_rdb.c b/arch/powerpc/platforms/85xx/p1023_rdb.c index 9df0439a9382c..e4fa8731fd2d3 100644 --- a/arch/powerpc/platforms/85xx/p1023_rdb.c +++ b/arch/powerpc/platforms/85xx/p1023_rdb.c @@ -15,9 +15,8 @@ #include #include #include +#include #include -#include -#include #include #include diff --git a/arch/powerpc/platforms/85xx/socrates.c b/arch/powerpc/platforms/85xx/socrates.c index 9fa1338bc0026..403367b318db2 100644 --- a/arch/powerpc/platforms/85xx/socrates.c +++ b/arch/powerpc/platforms/85xx/socrates.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c index 3768c86b96296..baa12eff6d5de 100644 --- a/arch/powerpc/platforms/85xx/socrates_fpga_pic.c +++ b/arch/powerpc/platforms/85xx/socrates_fpga_pic.c @@ -6,7 +6,6 @@ #include #include #include -#include #include /* diff --git a/arch/powerpc/platforms/85xx/stx_gp3.c b/arch/powerpc/platforms/85xx/stx_gp3.c index 5e2646b4c039f..c10efc45894c2 100644 --- a/arch/powerpc/platforms/85xx/stx_gp3.c +++ b/arch/powerpc/platforms/85xx/stx_gp3.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/tqm85xx.c b/arch/powerpc/platforms/85xx/tqm85xx.c index 80effb028bf49..6be1b9809db6b 100644 --- a/arch/powerpc/platforms/85xx/tqm85xx.c +++ b/arch/powerpc/platforms/85xx/tqm85xx.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/twr_p102x.c b/arch/powerpc/platforms/85xx/twr_p102x.c index b88e23a334a4f..c0a0456f16749 100644 --- a/arch/powerpc/platforms/85xx/twr_p102x.c +++ b/arch/powerpc/platforms/85xx/twr_p102x.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include +#include #include #include diff --git a/arch/powerpc/platforms/85xx/xes_mpc85xx.c b/arch/powerpc/platforms/85xx/xes_mpc85xx.c index 184013e6601e9..45f257fc1ade0 100644 --- a/arch/powerpc/platforms/85xx/xes_mpc85xx.c +++ b/arch/powerpc/platforms/85xx/xes_mpc85xx.c @@ -16,8 +16,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/86xx/common.c b/arch/powerpc/platforms/86xx/common.c index 0069d38263e70..a4a5505276091 100644 --- a/arch/powerpc/platforms/86xx/common.c +++ b/arch/powerpc/platforms/86xx/common.c @@ -3,7 +3,10 @@ * Routines common to most mpc86xx-based boards. */ +#include +#include #include +#include #include #include "mpc86xx.h" diff --git a/arch/powerpc/platforms/86xx/gef_ppc9a.c b/arch/powerpc/platforms/86xx/gef_ppc9a.c index f0512e51300cc..f7f98cca7b91e 100644 --- a/arch/powerpc/platforms/86xx/gef_ppc9a.c +++ b/arch/powerpc/platforms/86xx/gef_ppc9a.c @@ -18,8 +18,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/86xx/gef_sbc310.c b/arch/powerpc/platforms/86xx/gef_sbc310.c index 1430b524d982e..689835f7f088b 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc310.c +++ b/arch/powerpc/platforms/86xx/gef_sbc310.c @@ -18,8 +18,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/86xx/gef_sbc610.c b/arch/powerpc/platforms/86xx/gef_sbc610.c index c92af0d964e16..365f511186ca3 100644 --- a/arch/powerpc/platforms/86xx/gef_sbc610.c +++ b/arch/powerpc/platforms/86xx/gef_sbc610.c @@ -18,8 +18,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/86xx/mvme7100.c b/arch/powerpc/platforms/86xx/mvme7100.c index c0ac405143614..cee49ecd32d22 100644 --- a/arch/powerpc/platforms/86xx/mvme7100.c +++ b/arch/powerpc/platforms/86xx/mvme7100.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index 2c32c3488afbb..5c7bd925653d4 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -6,8 +6,8 @@ #include #include #include +#include #include -#include #include #include diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c index 106000449d3b2..28dc86744cac2 100644 --- a/arch/powerpc/platforms/cell/axon_msi.c +++ b/arch/powerpc/platforms/cell/axon_msi.c @@ -10,10 +10,11 @@ #include #include #include -#include #include #include +#include #include +#include #include #include diff --git a/arch/powerpc/platforms/cell/cbe_regs.c b/arch/powerpc/platforms/cell/cbe_regs.c index fb4023f9ea6be..99b3558753e9d 100644 --- a/arch/powerpc/platforms/cell/cbe_regs.c +++ b/arch/powerpc/platforms/cell/cbe_regs.c @@ -10,9 +10,8 @@ #include #include #include +#include #include -#include -#include #include #include diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 8c71330395669..1202a69b0a208 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/cell/setup.c b/arch/powerpc/platforms/cell/setup.c index 9e07d101bceeb..f64a1ef98aa88 100644 --- a/arch/powerpc/platforms/cell/setup.c +++ b/arch/powerpc/platforms/cell/setup.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/cell/spider-pci.c b/arch/powerpc/platforms/cell/spider-pci.c index e36ebd84f55b8..68439445b1c37 100644 --- a/arch/powerpc/platforms/cell/spider-pci.c +++ b/arch/powerpc/platforms/cell/spider-pci.c @@ -9,7 +9,6 @@ #include #include -#include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/holly.c b/arch/powerpc/platforms/embedded6xx/holly.c index 02ff260ae1ee6..ce9e58ee9754a 100644 --- a/arch/powerpc/platforms/embedded6xx/holly.c +++ b/arch/powerpc/platforms/embedded6xx/holly.c @@ -22,9 +22,9 @@ #include #include #include +#include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c index a4a79d77eca2a..f329a03edf4a6 100644 --- a/arch/powerpc/platforms/maple/setup.c +++ b/arch/powerpc/platforms/maple/setup.c @@ -36,8 +36,9 @@ #include #include #include +#include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c index 913b77b92cea1..fd130fe7a65ac 100644 --- a/arch/powerpc/platforms/pasemi/gpio_mdio.c +++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #define DELAY 1 diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 5c5b4a034f9ea..ef985ba2bf21f 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c index 0c41f4b005bcf..6de1cd5d8a583 100644 --- a/arch/powerpc/platforms/powermac/setup.c +++ b/arch/powerpc/platforms/powermac/setup.c @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 348a8cdaecd6d..828fc4d884718 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-rtc.c b/arch/powerpc/platforms/powernv/opal-rtc.c index a9bcf9217e649..79011a263aa6b 100644 --- a/arch/powerpc/platforms/powernv/opal-rtc.c +++ b/arch/powerpc/platforms/powernv/opal-rtc.c @@ -11,8 +11,9 @@ #include #include #include -#include +#include #include +#include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c index a8436bf35e2fc..6ac410f4d3c7e 100644 --- a/arch/powerpc/platforms/powernv/opal-secvar.c +++ b/arch/powerpc/platforms/powernv/opal-secvar.c @@ -12,8 +12,8 @@ #define pr_fmt(fmt) "secvar: "fmt #include +#include #include -#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 3192c614a1e17..8880a1c145733 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -6,7 +6,9 @@ */ #include +#include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 44703f13985bf..183aa8de48e74 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/sysdev/cpm_common.c b/arch/powerpc/sysdev/cpm_common.c index 8234013a8772a..47db732981a8b 100644 --- a/arch/powerpc/sysdev/cpm_common.c +++ b/arch/powerpc/sysdev/cpm_common.c @@ -15,11 +15,9 @@ */ #include -#include #include #include #include -#include #include #include diff --git a/arch/powerpc/sysdev/cpm_gpio.c b/arch/powerpc/sysdev/cpm_gpio.c index 0695d26bd301d..40f57111e93e9 100644 --- a/arch/powerpc/sysdev/cpm_gpio.c +++ b/arch/powerpc/sysdev/cpm_gpio.c @@ -9,7 +9,8 @@ */ #include -#include +#include +#include #include #ifdef CONFIG_8xx_GPIO diff --git a/arch/powerpc/sysdev/fsl_pmc.c b/arch/powerpc/sysdev/fsl_pmc.c index 76896de970ca9..9f6dd11c13443 100644 --- a/arch/powerpc/sysdev/fsl_pmc.c +++ b/arch/powerpc/sysdev/fsl_pmc.c @@ -13,9 +13,9 @@ #include #include #include -#include +#include #include -#include +#include struct pmc_regs { __be32 devdisr; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index 0331962bc6d2d..efd8f6291ea62 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -23,10 +23,10 @@ #include #include #include -#include +#include #include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c index c1f7249735897..58221b6e1465a 100644 --- a/arch/powerpc/sysdev/fsl_rmu.c +++ b/arch/powerpc/sysdev/fsl_rmu.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "fsl_rio.h" diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 68709743450e3..e71b3ede147ea 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c index 1a3ac0b5dd89c..7b449cc51aefd 100644 --- a/arch/powerpc/sysdev/mpic_msgr.c +++ b/arch/powerpc/sysdev/mpic_msgr.c @@ -7,9 +7,10 @@ */ #include +#include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/sysdev/mpic_timer.c b/arch/powerpc/sysdev/mpic_timer.c index b2f0a73e8f930..7166e2e0baaf6 100644 --- a/arch/powerpc/sysdev/mpic_timer.c +++ b/arch/powerpc/sysdev/mpic_timer.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/of_rtc.c b/arch/powerpc/sysdev/of_rtc.c index 420f949b74851..2211937d3788e 100644 --- a/arch/powerpc/sysdev/of_rtc.c +++ b/arch/powerpc/sysdev/of_rtc.c @@ -5,10 +5,10 @@ * Copyright 2007 David Gibson , IBM Corporation. */ #include -#include #include +#include #include -#include +#include #include #include diff --git a/arch/powerpc/sysdev/pmi.c b/arch/powerpc/sysdev/pmi.c index 9dabb50c36eb2..fcf8d1516210f 100644 --- a/arch/powerpc/sysdev/pmi.c +++ b/arch/powerpc/sysdev/pmi.c @@ -16,11 +16,11 @@ #include #include #include +#include #include #include -#include #include -#include +#include #include #include From 68877ff20a7f4f773069784cfe4f6fe9c7b9a841 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 25 Jul 2023 10:58:39 +1000 Subject: [PATCH 018/135] selftests/powerpc/ptrace: Explain why tests are skipped Many tests require specific hardware features/configurations that a typical machine might not have. As a result, it's common to see a test is skipped. But it is tedious to find out why a test is skipped when all it gives is the file location of the skip macro. Convert SKIP_IF() to SKIP_IF_MSG(), with appropriate descriptions of why the test is being skipped. This gives a general idea of why a test is skipped, which can be looked into further if it doesn't make sense. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230725005841.28854-3-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/ptrace/child.h | 4 ++-- tools/testing/selftests/powerpc/ptrace/core-pkey.c | 2 +- tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tar.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c | 4 ++-- tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c | 2 +- 15 files changed, 24 insertions(+), 24 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h index d7275b7b33dc9..df62ff0735f79 100644 --- a/tools/testing/selftests/powerpc/ptrace/child.h +++ b/tools/testing/selftests/powerpc/ptrace/child.h @@ -48,12 +48,12 @@ struct child_sync { } \ } while (0) -#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \ +#define PARENT_SKIP_IF_UNSUPPORTED(x, sync, msg) \ do { \ if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \ (sync)->parent_gave_up = true; \ prod_child(sync); \ - SKIP_IF(1); \ + SKIP_IF_MSG(1, msg); \ } \ } while (0) diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c index f6f8596ce8e12..f6da4cb30cd6d 100644 --- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c @@ -266,7 +266,7 @@ static int parent(struct shared_info *info, pid_t pid) * to the child. */ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); - PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported"); PARENT_FAIL_IF(ret, &info->child_sync); info->amr = regs[0]; diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c index f75739bbad28c..e374c6b7ace68 100644 --- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c @@ -884,7 +884,7 @@ static int perf_hwbreak(void) { srand ( time(NULL) ); - SKIP_IF(!perf_breakpoint_supported()); + SKIP_IF_MSG(!perf_breakpoint_supported(), "Perf breakpoints not supported"); return runtest(); } diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index 1345e9b9af0fb..a16239277a6f2 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -603,7 +603,7 @@ static int ptrace_hwbreak(void) wait(NULL); get_dbginfo(child_pid, &dbginfo); - SKIP_IF(dbginfo.num_data_bps == 0); + SKIP_IF_MSG(dbginfo.num_data_bps == 0, "No data breakpoints present"); dawr = dawr_present(&dbginfo); run_tests(child_pid, &dbginfo, dawr); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c index 3344e74a97b4f..16c6536001244 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -641,10 +641,10 @@ static int ptrace_perf_hwbreak(void) wait(NULL); /* <-- child (SIGUSR1) */ get_dbginfo(child_pid, &dbginfo); - SKIP_IF(dbginfo.num_data_bps <= 1); + SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - SKIP_IF(ret < 0); + SKIP_IF_MSG(ret < 0, "perf_event_open syscall failed"); close(ret); ret = test(child_pid); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c index bc454f8991246..d89474377f11a 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c @@ -192,7 +192,7 @@ static int parent(struct shared_info *info, pid_t pid) * to the child. */ ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3); - PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync); + PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported"); PARENT_FAIL_IF(ret, &info->child_sync); info->amr1 = info->amr2 = regs[0]; diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c index 4436ca9d3caf8..14726c77a6ce0 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c @@ -79,7 +79,7 @@ int ptrace_tar(void) int ret, status; // TAR was added in v2.07 - SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_2_07), "TAR requires ISA 2.07 compatible hardware"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c index 5dc152b162df6..7c70d62587c2c 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c @@ -112,8 +112,8 @@ int ptrace_tm_gpr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c index 458cc1a70ccfe..6c17ed0999692 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c @@ -118,8 +118,8 @@ int ptrace_tm_spd_gpr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid < 0) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index e112a34fbe59c..afd8dc2e20970 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -128,8 +128,8 @@ int ptrace_tm_spd_tar(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); pid = fork(); if (pid == 0) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index 40133d49fe39e..14d2fac8f237f 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -128,8 +128,8 @@ int ptrace_tm_spd_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT); for (i = 0; i < 128; i++) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c index 880ba6a29a483..e64cdb04cecf7 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c @@ -113,8 +113,8 @@ int ptrace_tm_spr(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT); shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT); pid = fork(); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index d0db6df0f0eae..3963d4b0429fb 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -116,8 +116,8 @@ int ptrace_tm_tar(void) pid_t pid; int ret, status; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); pid = fork(); if (pid == 0) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 4f05ce4fd2823..8c925d734a72d 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -112,8 +112,8 @@ int ptrace_tm_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_htm()); - SKIP_IF(htm_is_synthetic()); + SKIP_IF_MSG(!have_htm(), "Don't have transactional memory"); + SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); for (i = 0; i < 128; i++) { diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c index cb9875f764ca2..11bc624574feb 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c @@ -61,7 +61,7 @@ int ptrace_vsx(void) pid_t pid; int ret, status, i; - SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX)); + SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_VSX), "Don't have VSX"); shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT); From fc6732a8556c1099b89f4777a96bd6a1ae5a4378 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 25 Jul 2023 10:58:40 +1000 Subject: [PATCH 019/135] selftests/powerpc/ptrace: Fix typo in pid_max search error pid_max_addr() searches for the 'pid_max' symbol in /proc/kallsyms, and prints an error if it cannot find it. The error message has a typo, calling it pix_max. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230725005841.28854-4-bgray@linux.ibm.com --- tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c index 16c6536001244..d8a9e95fc03de 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -46,7 +46,7 @@ static unsigned long pid_max_addr(void) return strtoul(addr, &c, 16); } fclose(fp); - printf("Could not find pix_max. Exiting..\n"); + printf("Could not find pid_max. Exiting..\n"); exit(EXIT_FAILURE); return -1; } From c3062ede9927053754ba27b280afe00b9b31b37a Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 25 Jul 2023 10:58:41 +1000 Subject: [PATCH 020/135] selftests/powerpc/ptrace: Declare test temporary variables as volatile While the target is volatile, the temporary variables used to access the target cast away the volatile. This is undefined behaviour, and a compiler may optimise away/reorder these accesses, breaking the test. This was observed with GCC 13.1.1, but it can be difficult to reproduce because of the dependency on compiler behaviour. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230725005841.28854-5-bgray@linux.ibm.com --- .../selftests/powerpc/ptrace/ptrace-hwbreak.c | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index a16239277a6f2..75d30d61ab0e8 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -64,26 +64,26 @@ static bool dawr_present(struct ppc_debug_info *dbginfo) static void write_var(int len) { - __u8 *pcvar; - __u16 *psvar; - __u32 *pivar; - __u64 *plvar; + volatile __u8 *pcvar; + volatile __u16 *psvar; + volatile __u32 *pivar; + volatile __u64 *plvar; switch (len) { case 1: - pcvar = (__u8 *)&glvar; + pcvar = (volatile __u8 *)&glvar; *pcvar = 0xff; break; case 2: - psvar = (__u16 *)&glvar; + psvar = (volatile __u16 *)&glvar; *psvar = 0xffff; break; case 4: - pivar = (__u32 *)&glvar; + pivar = (volatile __u32 *)&glvar; *pivar = 0xffffffff; break; case 8: - plvar = (__u64 *)&glvar; + plvar = (volatile __u64 *)&glvar; *plvar = 0xffffffffffffffffLL; break; } @@ -98,16 +98,16 @@ static void read_var(int len) switch (len) { case 1: - cvar = (__u8)glvar; + cvar = (volatile __u8)glvar; break; case 2: - svar = (__u16)glvar; + svar = (volatile __u16)glvar; break; case 4: - ivar = (__u32)glvar; + ivar = (volatile __u32)glvar; break; case 8: - lvar = (__u64)glvar; + lvar = (volatile __u64)glvar; break; } } From 54f30b83fe627453082f15d83d7820b28b2d24bb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 27 Jul 2023 14:26:50 +0200 Subject: [PATCH 021/135] powerpc: address missing-prototypes warnings There are a few warnings in powerpc64 defconfig builds after -Wmissing-prototypes gets promoted from W=1 to the default warning set: arch/powerpc/mm/book3s64/pgtable.c:422:6: error: no previous prototype for 'arch_report_meminfo' [-Werror=missing-prototypes] arch/powerpc/platforms/cell/ras.c:275:5: error: no previous prototype for 'cbe_sysreset_hack' [-Werror=missing-prototypes] arch/powerpc/platforms/cell/spu_manage.c:29:21: error: no previous prototype for 'spu_devnode' [-Werror=missing-prototypes] arch/powerpc/platforms/pasemi/time.c:12:17: error: no previous prototype for 'pas_get_boot_time' [-Werror=missing-prototypes] arch/powerpc/platforms/powermac/feature.c:1532:13: error: no previous prototype for 'g5_phy_disable_cpu1' [-Werror=missing-prototypes] arch/powerpc/platforms/86xx/pic.c:28:13: error: no previous prototype for 'mpc86xx_init_irq' [-Werror=missing-prototypes] drivers/pci/pci-sysfs.c:936:13: error: no previous prototype for 'pci_adjust_legacy_attr' [-Werror=missing-prototypes] Address these by including the right header files or marking the functions static. The audit.c one is a bit tricky since compat_audit.h cannot include regular kernel headers tht have conflicting types on 32-bit powerpc. Signed-off-by: Arnd Bergmann [mpe: Drop change to __vmemmap_free() which only exists in mm] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230727122720.2558065-1-arnd@kernel.org --- arch/powerpc/include/asm/pci.h | 3 ++- arch/powerpc/kernel/audit.c | 3 ++- arch/powerpc/kernel/audit_32.h | 7 +++++++ arch/powerpc/kernel/compat_audit.c | 2 ++ arch/powerpc/mm/book3s64/pgtable.c | 1 + arch/powerpc/platforms/86xx/pic.c | 2 ++ arch/powerpc/platforms/cell/ras.c | 2 +- arch/powerpc/platforms/cell/spu_manage.c | 1 + arch/powerpc/platforms/pasemi/pasemi.h | 1 + arch/powerpc/platforms/pasemi/time.c | 2 ++ arch/powerpc/platforms/powermac/feature.c | 2 ++ 11 files changed, 23 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/kernel/audit_32.h diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 289f1ec85bc54..f5078a7dd85a5 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -82,7 +82,8 @@ extern int pci_legacy_write(struct pci_bus *bus, loff_t port, u32 val, extern int pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, enum pci_mmap_state mmap_state); - +extern void pci_adjust_legacy_attr(struct pci_bus *bus, + enum pci_mmap_state mmap_type); #define HAVE_PCI_LEGACY 1 extern void pcibios_claim_one_bus(struct pci_bus *b); diff --git a/arch/powerpc/kernel/audit.c b/arch/powerpc/kernel/audit.c index 1bcfca5fdf676..92298d6a3a37a 100644 --- a/arch/powerpc/kernel/audit.c +++ b/arch/powerpc/kernel/audit.c @@ -4,6 +4,8 @@ #include #include +#include "audit_32.h" + static unsigned dir_class[] = { #include ~0U @@ -41,7 +43,6 @@ int audit_classify_arch(int arch) int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_PPC64 - extern int ppc32_classify_syscall(unsigned); if (abi == AUDIT_ARCH_PPC) return ppc32_classify_syscall(syscall); #endif diff --git a/arch/powerpc/kernel/audit_32.h b/arch/powerpc/kernel/audit_32.h new file mode 100644 index 0000000000000..c6c79c3041ab2 --- /dev/null +++ b/arch/powerpc/kernel/audit_32.h @@ -0,0 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef __AUDIT_32_H__ +#define __AUDIT_32_H__ + +extern int ppc32_classify_syscall(unsigned); + +#endif diff --git a/arch/powerpc/kernel/compat_audit.c b/arch/powerpc/kernel/compat_audit.c index d92ffe4e5dc1c..57b38c592b9f3 100644 --- a/arch/powerpc/kernel/compat_audit.c +++ b/arch/powerpc/kernel/compat_audit.c @@ -3,6 +3,8 @@ #include #include +#include "audit_32.h" + unsigned ppc32_dir_class[] = { #include ~0U diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 85c84e89e3eaf..2f2872986b09a 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/86xx/pic.c b/arch/powerpc/platforms/86xx/pic.c index 5c7bd925653d4..9ca36de235321 100644 --- a/arch/powerpc/platforms/86xx/pic.c +++ b/arch/powerpc/platforms/86xx/pic.c @@ -12,6 +12,8 @@ #include #include +#include "mpc86xx.h" + #ifdef CONFIG_PPC_I8259 static void mpc86xx_8259_cascade(struct irq_desc *desc) { diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 98db63b72d56b..f6b87926530cc 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -22,7 +22,7 @@ #include #include "ras.h" - +#include "pervasive.h" static void dump_fir(int cpu) { diff --git a/arch/powerpc/platforms/cell/spu_manage.c b/arch/powerpc/platforms/cell/spu_manage.c index 74567b32c48c2..f464a1f2e5681 100644 --- a/arch/powerpc/platforms/cell/spu_manage.c +++ b/arch/powerpc/platforms/cell/spu_manage.c @@ -25,6 +25,7 @@ #include "spufs/spufs.h" #include "interrupt.h" +#include "spu_priv1_mmio.h" struct device_node *spu_devnode(struct spu *spu) { diff --git a/arch/powerpc/platforms/pasemi/pasemi.h b/arch/powerpc/platforms/pasemi/pasemi.h index 3f277a200fd87..018c30665e1b3 100644 --- a/arch/powerpc/platforms/pasemi/pasemi.h +++ b/arch/powerpc/platforms/pasemi/pasemi.h @@ -4,6 +4,7 @@ extern time64_t pas_get_boot_time(void); extern void pas_pci_init(void); +struct pci_dev; extern void pas_pci_irq_fixup(struct pci_dev *dev); extern void pas_pci_dma_dev_setup(struct pci_dev *dev); diff --git a/arch/powerpc/platforms/pasemi/time.c b/arch/powerpc/platforms/pasemi/time.c index ad721882c8b6f..70ac6db027d0e 100644 --- a/arch/powerpc/platforms/pasemi/time.c +++ b/arch/powerpc/platforms/pasemi/time.c @@ -9,6 +9,8 @@ #include +#include "pasemi.h" + time64_t __init pas_get_boot_time(void) { /* Let's just return a fake date right now */ diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index ed58928469b5b..ed472b797e28a 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -37,6 +37,8 @@ #include #include +#include "pmac.h" + #undef DEBUG_FEATURE #ifdef DEBUG_FEATURE From bbfa509d632946578d4f19aa2cedf1ca2f34565d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 26 Jul 2023 10:14:42 +0200 Subject: [PATCH 022/135] powerpc/ep8248e: Mark driver as non removable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Instead of resorting to BUG() ensure that the driver isn't unbound by suppressing its bind and unbind sysfs attributes. As the driver is built-in there is no way to remove a device once bound. As a nice side effect this allows to drop the remove function. Signed-off-by: Uwe Kleine-König Signed-off-by: Michael Ellerman Link: https://msgid.link/20230726081442.461026-1-u.kleine-koenig@pengutronix.de --- arch/powerpc/platforms/82xx/ep8248e.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 16808536f7888..4bfa1a95e1555 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -141,12 +141,6 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev) return ret; } -static int ep8248e_mdio_remove(struct platform_device *ofdev) -{ - BUG(); - return 0; -} - static const struct of_device_id ep8248e_mdio_match[] = { { .compatible = "fsl,ep8248e-mdio-bitbang", @@ -158,9 +152,9 @@ static struct platform_driver ep8248e_mdio_driver = { .driver = { .name = "ep8248e-mdio-bitbang", .of_match_table = ep8248e_mdio_match, + .suppress_bind_attrs = true, }, .probe = ep8248e_mdio_probe, - .remove = ep8248e_mdio_remove, }; struct cpm_pin { From 7f96539437eafec8fd062fb13f31cf53251ea18d Mon Sep 17 00:00:00 2001 From: Laurent Dufour Date: Tue, 25 Jul 2023 15:27:59 +0200 Subject: [PATCH 023/135] powerpc/kexec: fix minor typo Function name in the descriptor was not correct. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202307251721.bUGcsCeQ-lkp@intel.com/ Signed-off-by: Laurent Dufour Signed-off-by: Michael Ellerman Link: https://msgid.link/20230725132759.53975-1-ldufour@linux.ibm.com --- arch/powerpc/kexec/file_load_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 7e69be55451a7..17534daa3c48c 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -933,9 +933,9 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, } /** - * get_cpu_node_size - Compute the size of a CPU node in the FDT. - * This should be done only once and the value is stored in - * a static variable. + * cpu_node_size - Compute the size of a CPU node in the FDT. + * This should be done only once and the value is stored in + * a static variable. * Returns the max size of a CPU node in the FDT. */ static unsigned int cpu_node_size(void) From 3b3a4d0fe542b8c2295cf934305b45a14e224beb Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 5 Jul 2023 16:51:41 +0200 Subject: [PATCH 024/135] powerpc/pseries: Initialise CPU hotplug callbacks earlier As part of the generic HOTPLUG_SMT code, there is support for disabling secondary SMT threads at boot time, by passing "nosmt" on the kernel command line. The way that is implemented is the secondary threads are brought partly online, and then taken back offline again. That is done to support x86 CPUs needing certain initialisation done on all threads. However powerpc has similar needs, see commit d70a54e2d085 ("powerpc/powernv: Ignore smt-enabled on Power8 and later"). For that to work the powerpc CPU hotplug callbacks need to be registered before secondary CPUs are brought online, otherwise __cpu_disable() fails due to smp_ops->cpu_disable being NULL. So split the basic initialisation into pseries_cpu_hotplug_init() which can be called early from setup_arch(). The DLPAR related initialisation can still be done later, because it needs to do allocations. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230705145143.40545-9-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 22 ++++++++++++-------- arch/powerpc/platforms/pseries/pseries.h | 2 ++ arch/powerpc/platforms/pseries/setup.c | 2 ++ 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 1a3cb313976a4..61fb7cb008803 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -845,15 +845,9 @@ static struct notifier_block pseries_smp_nb = { .notifier_call = pseries_smp_notifier, }; -static int __init pseries_cpu_hotplug_init(void) +void __init pseries_cpu_hotplug_init(void) { int qcss_tok; - unsigned int node; - -#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE - ppc_md.cpu_probe = dlpar_cpu_probe; - ppc_md.cpu_release = dlpar_cpu_release; -#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ rtas_stop_self_token = rtas_function_token(RTAS_FN_STOP_SELF); qcss_tok = rtas_function_token(RTAS_FN_QUERY_CPU_STOPPED_STATE); @@ -862,12 +856,22 @@ static int __init pseries_cpu_hotplug_init(void) qcss_tok == RTAS_UNKNOWN_SERVICE) { printk(KERN_INFO "CPU Hotplug not supported by firmware " "- disabling.\n"); - return 0; + return; } smp_ops->cpu_offline_self = pseries_cpu_offline_self; smp_ops->cpu_disable = pseries_cpu_disable; smp_ops->cpu_die = pseries_cpu_die; +} + +static int __init pseries_dlpar_init(void) +{ + unsigned int node; + +#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE + ppc_md.cpu_probe = dlpar_cpu_probe; + ppc_md.cpu_release = dlpar_cpu_release; +#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */ /* Processors can be added/removed only on LPAR */ if (firmware_has_feature(FW_FEATURE_LPAR)) { @@ -886,4 +890,4 @@ static int __init pseries_cpu_hotplug_init(void) return 0; } -machine_arch_initcall(pseries, pseries_cpu_hotplug_init); +machine_arch_initcall(pseries, pseries_dlpar_init); diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index f8bce40ebd0ce..f8893ba46e83a 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -75,11 +75,13 @@ static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog) #ifdef CONFIG_HOTPLUG_CPU int dlpar_cpu(struct pseries_hp_errorlog *hp_elog); +void pseries_cpu_hotplug_init(void); #else static inline int dlpar_cpu(struct pseries_hp_errorlog *hp_elog) { return -EOPNOTSUPP; } +static inline void pseries_cpu_hotplug_init(void) { } #endif /* PCI root bridge prepare function override for pseries */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index e2a57cfa6c837..41451b76c6e51 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -816,6 +816,8 @@ static void __init pSeries_setup_arch(void) /* Discover PIC type and setup ppc_md accordingly */ smp_init_pseries(); + // Setup CPU hotplug callbacks + pseries_cpu_hotplug_init(); if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE)) if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) From 73c58e7e14126466701705820c4eb47daddc357d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 5 Jul 2023 16:51:42 +0200 Subject: [PATCH 025/135] powerpc: Add HOTPLUG_SMT support Add support for HOTPLUG_SMT, which enables the generic sysfs SMT support files in /sys/devices/system/cpu/smt, as well as the "nosmt" boot parameter. Implement the recently added hooks to allow partial SMT states, allow any number of threads per core. Tie the config symbol to HOTPLUG_CPU, which enables it on the major platforms that support SMT. If there are other platforms that want the SMT support that can be tweaked in future. Signed-off-by: Michael Ellerman [ldufour: remove topology_smt_supported] [ldufour: remove topology_smt_threads_supported] [ldufour: select CONFIG_SMT_NUM_THREADS_DYNAMIC] [ldufour: update kernel-parameters.txt] Signed-off-by: Laurent Dufour Link: https://msgid.link/20230705145143.40545-10-ldufour@linux.ibm.com --- Documentation/admin-guide/kernel-parameters.txt | 4 ++-- arch/powerpc/Kconfig | 2 ++ arch/powerpc/include/asm/topology.h | 15 +++++++++++++++ arch/powerpc/kernel/smp.c | 8 +++++++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index a1457995fd41c..cac4643867792 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3853,10 +3853,10 @@ nosmp [SMP] Tells an SMP kernel to act as a UP kernel, and disable the IO APIC. legacy for "maxcpus=0". - nosmt [KNL,MIPS,S390] Disable symmetric multithreading (SMT). + nosmt [KNL,MIPS,PPC,S390] Disable symmetric multithreading (SMT). Equivalent to smt=1. - [KNL,X86] Disable symmetric multithreading (SMT). + [KNL,X86,PPC] Disable symmetric multithreading (SMT). nosmt=force: Force disable SMT, cannot be undone via the sysfs control file. diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 0b1172cbeccb3..aef38d2ca542f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -272,6 +272,8 @@ config PPC select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_VIRT_CPU_ACCOUNTING_GEN + select HOTPLUG_SMT if HOTPLUG_CPU + select SMT_NUM_THREADS_DYNAMIC select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE select IOMMU_HELPER if PPC64 select IRQ_DOMAIN diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 8a4d4f4d97495..f4e6f2dd04b73 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -143,5 +143,20 @@ static inline int cpu_to_coregroup_id(int cpu) #endif #endif +#ifdef CONFIG_HOTPLUG_SMT +#include +#include + +static inline bool topology_is_primary_thread(unsigned int cpu) +{ + return cpu == cpu_first_thread_sibling(cpu); +} + +static inline bool topology_smt_thread_allowed(unsigned int cpu) +{ + return cpu_thread_in_core(cpu) < cpu_smt_num_threads; +} +#endif + #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_TOPOLOGY_H */ diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index fbbb695bae3d2..b9f0f8f11c376 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1087,7 +1087,7 @@ static int __init init_big_cores(void) void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned int cpu; + unsigned int cpu, num_threads; DBG("smp_prepare_cpus\n"); @@ -1154,6 +1154,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (smp_ops && smp_ops->probe) smp_ops->probe(); + + // Initalise the generic SMT topology support + num_threads = 1; + if (smt_enabled_at_boot) + num_threads = smt_enabled_at_boot; + cpu_smt_set_num_threads(num_threads, threads_per_core); } void smp_prepare_boot_cpu(void) From d1099e2276df1d8dd4037552c2f34eb4c4df4a75 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 5 Jul 2023 16:51:43 +0200 Subject: [PATCH 026/135] powerpc/pseries: Honour current SMT state when DLPAR onlining CPUs Integrate with the generic SMT support, so that when a CPU is DLPAR onlined it is brought up with the correct SMT mode. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230705145143.40545-11-ldufour@linux.ibm.com --- arch/powerpc/platforms/pseries/hotplug-cpu.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index 61fb7cb008803..e62835a12d73f 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -398,6 +398,14 @@ static int dlpar_online_cpu(struct device_node *dn) for_each_present_cpu(cpu) { if (get_hard_smp_processor_id(cpu) != thread) continue; + + if (!topology_is_primary_thread(cpu)) { + if (cpu_smt_control != CPU_SMT_ENABLED) + break; + if (!topology_smt_thread_allowed(cpu)) + break; + } + cpu_maps_update_done(); find_and_update_cpu_nid(cpu); rc = device_online(get_cpu_device(cpu)); From 0b5e06e9cb156e7e97bfb4e1ebf6acd62497eaf5 Mon Sep 17 00:00:00 2001 From: Yuan Tan Date: Wed, 2 Aug 2023 21:41:30 +0800 Subject: [PATCH 027/135] powerpc/pmac32: enable serial options by default in defconfig Serial is a critical feature for logging and debuging, and the other architectures enable serial by default. Let's enable CONFIG_SERIAL_PMACZILOG and CONFIG_SERIAL_PMACZILOG_CONSOLE by default. Signed-off-by: Yuan Tan Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/bb7b5f9958b3e3a20f6573ff7ce7c5dc566e7e32.1690982937.git.tanyuan@tinylab.org --- arch/powerpc/configs/pmac32_defconfig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig index 019163c2571e6..3aae79afb9d90 100644 --- a/arch/powerpc/configs/pmac32_defconfig +++ b/arch/powerpc/configs/pmac32_defconfig @@ -176,8 +176,9 @@ CONFIG_MOUSE_APPLETOUCH=y # CONFIG_SERIO_I8042 is not set # CONFIG_SERIO_SERPORT is not set CONFIG_SERIAL_8250=m -CONFIG_SERIAL_PMACZILOG=m +CONFIG_SERIAL_PMACZILOG=y CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y CONFIG_NVRAM=y CONFIG_I2C_CHARDEV=m CONFIG_APM_POWER=y From 0f7ce21ab5209426b00636636a5f2d9008738654 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 9 Aug 2023 15:10:08 +0200 Subject: [PATCH 028/135] powerpc: mark more local variables as volatile A while ago I created a2305e3de8193 ("powerpc: mark local variables around longjmp as volatile") in order to allow building powerpc with -Wextra enabled on gcc-11. I tried this again with gcc-13 and found two more of the same issues, presumably based on slightly different optimization paths being taken here: arch/powerpc/xmon/xmon.c:3306:27: error: variable 'mm' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] arch/powerpc/kexec/crash.c:353:22: error: variable 'i' might be clobbered by 'longjmp' or 'vfork' [-Werror=clobbered] I checked a bunch of randconfigs and found only these two, so just address them the same way as the others. Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20230809131024.2039647-1-arnd@kernel.org --- arch/powerpc/kexec/crash.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c index 252724ed666a3..ef5c2d25ec397 100644 --- a/arch/powerpc/kexec/crash.c +++ b/arch/powerpc/kexec/crash.c @@ -350,7 +350,7 @@ EXPORT_SYMBOL(crash_shutdown_unregister); void default_machine_crash_shutdown(struct pt_regs *regs) { - unsigned int i; + volatile unsigned int i; int (*old_handler)(struct pt_regs *regs); if (TRAP(regs) == INTERRUPT_SYSTEM_RESET) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index fae747cc57d2d..0f3f93c82d7e0 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3303,7 +3303,7 @@ static void show_pte(unsigned long addr) { unsigned long tskv = 0; struct task_struct *volatile tsk = NULL; - struct mm_struct *mm; + struct mm_struct *volatile mm; pgd_t *pgdp; p4d_t *p4dp; pud_t *pudp; From ef73dcaa31217c79adc548bf9045afb40ac75b82 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 9 Aug 2023 15:10:09 +0200 Subject: [PATCH 029/135] powerpc: xmon: remove unused variables Randconfig testing with W=1 showed up these warnings that I'd like to enable by default: arch/powerpc/xmon/xmon.c: In function 'dump_tlb_book3e': arch/powerpc/xmon/xmon.c:3833:42: error: variable 'lrat' set but not used [-Werror=unused-but-set-variable] 3833 | int i, tlb, ntlbs, pidsz, lpidsz, rasz, lrat = 0; | ^~~~ arch/powerpc/xmon/xmon.c:3831:23: error: variable 'lpidmask' set but not used [-Werror=unused-but-set-variable] 3831 | u32 mmucfg, pidmask, lpidmask; | ^~~~~~~~ arch/powerpc/xmon/xmon.c:3831:14: error: variable 'pidmask' set but not used [-Werror=unused-but-set-variable] 3831 | u32 mmucfg, pidmask, lpidmask; | ^~~~~~~ Just remove these as they have been unused since the code was added in 2010. Fixes: 03247157f7391 ("powerpc/book3e: Add TLB dump in xmon for Book3E") Signed-off-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/20230809131024.2039647-2-arnd@kernel.org --- arch/powerpc/xmon/xmon.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 0f3f93c82d7e0..78453b9b1ba0e 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3828,9 +3828,9 @@ static void dump_tlb_44x(void) #ifdef CONFIG_PPC_BOOK3E_64 static void dump_tlb_book3e(void) { - u32 mmucfg, pidmask, lpidmask; + u32 mmucfg; u64 ramask; - int i, tlb, ntlbs, pidsz, lpidsz, rasz, lrat = 0; + int i, tlb, ntlbs, pidsz, lpidsz, rasz; int mmu_version; static const char *pgsz_names[] = { " 1K", @@ -3874,12 +3874,8 @@ static void dump_tlb_book3e(void) pidsz = ((mmucfg >> 6) & 0x1f) + 1; lpidsz = (mmucfg >> 24) & 0xf; rasz = (mmucfg >> 16) & 0x7f; - if ((mmu_version > 1) && (mmucfg & 0x10000)) - lrat = 1; printf("Book3E MMU MAV=%d.0,%d TLBs,%d-bit PID,%d-bit LPID,%d-bit RA\n", mmu_version, ntlbs, pidsz, lpidsz, rasz); - pidmask = (1ul << pidsz) - 1; - lpidmask = (1ul << lpidsz) - 1; ramask = (1ull << rasz) - 1; for (tlb = 0; tlb < ntlbs; tlb++) { From 4a9dd8f292efd614f0a18452e6474fe19ae17b47 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 9 Aug 2023 10:01:43 +0200 Subject: [PATCH 030/135] powerpc/radix: Move some functions into #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE With skiboot_defconfig, Clang reports: CC arch/powerpc/mm/book3s64/radix_tlb.o arch/powerpc/mm/book3s64/radix_tlb.c:419:20: error: unused function '_tlbie_pid_lpid' [-Werror,-Wunused-function] static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, ^ arch/powerpc/mm/book3s64/radix_tlb.c:663:20: error: unused function '_tlbie_va_range_lpid' [-Werror,-Wunused-function] static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, ^ This is because those functions are only called from functions enclosed in a #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE Move below functions inside that #ifdef * __tlbie_pid_lpid(unsigned long pid, * __tlbie_va_lpid(unsigned long va, unsigned long pid, * fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) * _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, * fixup_tlbie_va_range_lpid(unsigned long va, * __tlbie_va_range_lpid(unsigned long start, unsigned long end, * _tlbie_va_range_lpid(unsigned long start, unsigned long end, Fixes: f0c6fbbb9050 ("KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE") Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202307260802.Mjr99P5O-lkp@intel.com/ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/3d72efd39f986ee939d068af69fdce28bd600766.1691568093.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s64/radix_tlb.c | 240 ++++++++++++++------------- 1 file changed, 121 insertions(+), 119 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index dd127087fee8a..6f48fffe8b9c9 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -127,21 +127,6 @@ static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static __always_inline void __tlbie_pid_lpid(unsigned long pid, - unsigned long lpid, - unsigned long ric) -{ - unsigned long rb, rs, prs, r; - - rb = PPC_BIT(53); /* IS = 1 */ - rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); - prs = 1; /* process scoped */ - r = 1; /* radix format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) { unsigned long rb,rs,prs,r; @@ -202,23 +187,6 @@ static __always_inline void __tlbie_va(unsigned long va, unsigned long pid, trace_tlbie(0, 0, rb, rs, ric, prs, r); } -static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, - unsigned long lpid, - unsigned long ap, unsigned long ric) -{ - unsigned long rb, rs, prs, r; - - rb = va & ~(PPC_BITMASK(52, 63)); - rb |= ap << PPC_BITLSHIFT(58); - rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); - prs = 1; /* process scoped */ - r = 1; /* radix format */ - - asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) - : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); - trace_tlbie(0, 0, rb, rs, ric, prs, r); -} - static __always_inline void __tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap, unsigned long ric) { @@ -264,22 +232,6 @@ static inline void fixup_tlbie_va_range(unsigned long va, unsigned long pid, } } -static inline void fixup_tlbie_va_range_lpid(unsigned long va, - unsigned long pid, - unsigned long lpid, - unsigned long ap) -{ - if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); - } - - if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); - } -} - static inline void fixup_tlbie_pid(unsigned long pid) { /* @@ -299,26 +251,6 @@ static inline void fixup_tlbie_pid(unsigned long pid) } } -static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) -{ - /* - * We can use any address for the invalidation, pick one which is - * probably unused as an optimisation. - */ - unsigned long va = ((1UL << 52) - 1); - - if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); - } - - if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { - asm volatile("ptesync" : : : "memory"); - __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), - RIC_FLUSH_TLB); - } -} - static inline void fixup_tlbie_lpid_va(unsigned long va, unsigned long lpid, unsigned long ap) { @@ -416,31 +348,6 @@ static inline void _tlbie_pid(unsigned long pid, unsigned long ric) asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, - unsigned long ric) -{ - asm volatile("ptesync" : : : "memory"); - - /* - * Workaround the fact that the "ric" argument to __tlbie_pid - * must be a compile-time contraint to match the "i" constraint - * in the asm statement. - */ - switch (ric) { - case RIC_FLUSH_TLB: - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); - fixup_tlbie_pid_lpid(pid, lpid); - break; - case RIC_FLUSH_PWC: - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); - break; - case RIC_FLUSH_ALL: - default: - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); - fixup_tlbie_pid_lpid(pid, lpid); - } - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); -} struct tlbiel_pid { unsigned long pid; unsigned long ric; @@ -566,20 +473,6 @@ static inline void __tlbie_va_range(unsigned long start, unsigned long end, fixup_tlbie_va_range(addr - page_size, pid, ap); } -static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, - unsigned long pid, unsigned long lpid, - unsigned long page_size, - unsigned long psize) -{ - unsigned long addr; - unsigned long ap = mmu_get_ap(psize); - - for (addr = start; addr < end; addr += page_size) - __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); - - fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); -} - static __always_inline void _tlbie_va(unsigned long va, unsigned long pid, unsigned long psize, unsigned long ric) { @@ -660,18 +553,6 @@ static inline void _tlbie_va_range(unsigned long start, unsigned long end, asm volatile("eieio; tlbsync; ptesync": : :"memory"); } -static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, - unsigned long pid, unsigned long lpid, - unsigned long page_size, - unsigned long psize, bool also_pwc) -{ - asm volatile("ptesync" : : : "memory"); - if (also_pwc) - __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); - __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); -} - static inline void _tlbiel_va_range_multicast(struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long pid, unsigned long page_size, @@ -1514,6 +1395,127 @@ void radix__flush_tlb_all(void) } #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +static __always_inline void __tlbie_pid_lpid(unsigned long pid, + unsigned long lpid, + unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = PPC_BIT(53); /* IS = 1 */ + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static __always_inline void __tlbie_va_lpid(unsigned long va, unsigned long pid, + unsigned long lpid, + unsigned long ap, unsigned long ric) +{ + unsigned long rb, rs, prs, r; + + rb = va & ~(PPC_BITMASK(52, 63)); + rb |= ap << PPC_BITLSHIFT(58); + rs = (pid << PPC_BITLSHIFT(31)) | (lpid & ~(PPC_BITMASK(0, 31))); + prs = 1; /* process scoped */ + r = 1; /* radix format */ + + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1) + : : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory"); + trace_tlbie(0, 0, rb, rs, ric, prs, r); +} + +static inline void fixup_tlbie_pid_lpid(unsigned long pid, unsigned long lpid) +{ + /* + * We can use any address for the invalidation, pick one which is + * probably unused as an optimisation. + */ + unsigned long va = ((1UL << 52) - 1); + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, mmu_get_ap(MMU_PAGE_64K), + RIC_FLUSH_TLB); + } +} + +static inline void _tlbie_pid_lpid(unsigned long pid, unsigned long lpid, + unsigned long ric) +{ + asm volatile("ptesync" : : : "memory"); + + /* + * Workaround the fact that the "ric" argument to __tlbie_pid + * must be a compile-time contraint to match the "i" constraint + * in the asm statement. + */ + switch (ric) { + case RIC_FLUSH_TLB: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_TLB); + fixup_tlbie_pid_lpid(pid, lpid); + break; + case RIC_FLUSH_PWC: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + break; + case RIC_FLUSH_ALL: + default: + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_ALL); + fixup_tlbie_pid_lpid(pid, lpid); + } + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} + +static inline void fixup_tlbie_va_range_lpid(unsigned long va, + unsigned long pid, + unsigned long lpid, + unsigned long ap) +{ + if (cpu_has_feature(CPU_FTR_P9_TLBIE_ERAT_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_pid_lpid(0, lpid, RIC_FLUSH_TLB); + } + + if (cpu_has_feature(CPU_FTR_P9_TLBIE_STQ_BUG)) { + asm volatile("ptesync" : : : "memory"); + __tlbie_va_lpid(va, pid, lpid, ap, RIC_FLUSH_TLB); + } +} + +static inline void __tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize) +{ + unsigned long addr; + unsigned long ap = mmu_get_ap(psize); + + for (addr = start; addr < end; addr += page_size) + __tlbie_va_lpid(addr, pid, lpid, ap, RIC_FLUSH_TLB); + + fixup_tlbie_va_range_lpid(addr - page_size, pid, lpid, ap); +} + +static inline void _tlbie_va_range_lpid(unsigned long start, unsigned long end, + unsigned long pid, unsigned long lpid, + unsigned long page_size, + unsigned long psize, bool also_pwc) +{ + asm volatile("ptesync" : : : "memory"); + if (also_pwc) + __tlbie_pid_lpid(pid, lpid, RIC_FLUSH_PWC); + __tlbie_va_range_lpid(start, end, pid, lpid, page_size, psize); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); +} + /* * Performs process-scoped invalidations for a given LPID * as part of H_RPT_INVALIDATE hcall. From 506e550a7da9e995bea3bd585db591068f98b2bf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 9 Aug 2023 17:07:40 -0700 Subject: [PATCH 031/135] powerpc/pseries: PLPKS: undo kernel-doc comment notation Don't use kernel-doc "/**" comment format for non-kernel-doc comments. This prevents a kernel-doc warning: arch/powerpc/platforms/pseries/plpks.c:186: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst * Label is combination of label attributes + name. Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore") Signed-off-by: Randy Dunlap Reported-by: kernel test robot Link: lore.kernel.org/r/202308040430.GxmPAnwZ-lkp@intel.com Signed-off-by: Michael Ellerman Link: https://msgid.link/20230810000740.23756-1-rdunlap@infradead.org --- arch/powerpc/platforms/pseries/plpks.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c index b0658ea3eccbf..2d40304eb6c16 100644 --- a/arch/powerpc/platforms/pseries/plpks.c +++ b/arch/powerpc/platforms/pseries/plpks.c @@ -194,7 +194,7 @@ static struct plpks_auth *construct_auth(u8 consumer) return auth; } -/** +/* * Label is combination of label attributes + name. * Label attributes are used internally by kernel and not exposed to the user. */ From cd50430ceb3598957803934068531a274349bcf9 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 10 Aug 2023 16:19:24 +0200 Subject: [PATCH 032/135] macintosh/ams: mark ams_init() static This is the module init function, which by definition is used only locally, so mark it static to avoid a warning: drivers/macintosh/ams/ams-core.c:179:12: error: no previous prototype for 'ams_init' [-Werror=missing-prototypes] Signed-off-by: Arnd Bergmann Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230810141947.1236730-7-arnd@kernel.org --- drivers/macintosh/ams/ams-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/macintosh/ams/ams-core.c b/drivers/macintosh/ams/ams-core.c index 877e8cb231283..c978b4272daa5 100644 --- a/drivers/macintosh/ams/ams-core.c +++ b/drivers/macintosh/ams/ams-core.c @@ -176,7 +176,7 @@ int ams_sensor_attach(void) return result; } -int __init ams_init(void) +static int __init ams_init(void) { struct device_node *np; From 075a88d5eb2806712c64bed98c30b6890ec30311 Mon Sep 17 00:00:00 2001 From: Zheng Zengkai Date: Fri, 11 Aug 2023 18:20:39 +0800 Subject: [PATCH 033/135] ocxl: Use pci_dev_id() to simplify the code PCI core API pci_dev_id() can be used to get the BDF number for a pci device. We don't need to compose it mannually. Use pci_dev_id() to simplify the code a little bit. Signed-off-by: Zheng Zengkai Acked-by: Andrew Donnellan Acked-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://msgid.link/20230811102039.17257-1-zhengzengkai@huawei.com --- arch/powerpc/platforms/powernv/ocxl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c index 629067781cec0..64a9c7125c294 100644 --- a/arch/powerpc/platforms/powernv/ocxl.c +++ b/arch/powerpc/platforms/powernv/ocxl.c @@ -449,7 +449,7 @@ int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, if (!data) return -ENOMEM; - bdfn = (dev->bus->number << 8) | dev->devfn; + bdfn = pci_dev_id(dev); rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), PE_mask); if (rc) { From 60bc069c433fc89caa97927b1636401a0e647f67 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2023 08:04:38 +0200 Subject: [PATCH 034/135] powerpc/include: Remove unneeded #include tqm8xx_setup.c and fs_enet.h don't use any items provided by fs_pd.h Remove unneeded #include Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b056c4e986a4a7707fc1994304c34f7bd15d6871.1691474658.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/8xx/tqm8xx_setup.c | 1 - drivers/net/ethernet/freescale/fs_enet/fs_enet.h | 1 - 2 files changed, 2 deletions(-) diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 7d8eb50bb9cd3..c422262ba27b9 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -39,7 +39,6 @@ #include #include #include -#include #include #include "mpc8xx.h" diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index cb419aef8d1b8..aad96cb2ab4e9 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -10,7 +10,6 @@ #include #include -#include #ifdef CONFIG_CPM1 #include From e6e077cb2aa4ffb8b320f9a1464f29a21986a901 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2023 08:04:39 +0200 Subject: [PATCH 035/135] powerpc/include: Declare mpc8xx_immr in 8xx_immap.h Do the same as for cmp2_immr : declare it at the same place as its type immap_t, that is in 8xx_immap.h instead of fs_pd.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/62d490b65899c2f2667ca7045c5f0fad9cbda458.1691474658.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/8xx_immap.h | 2 ++ arch/powerpc/include/asm/fs_pd.h | 2 -- arch/powerpc/platforms/8xx/adder875.c | 2 +- arch/powerpc/platforms/8xx/mpc86xads_setup.c | 1 - arch/powerpc/platforms/8xx/mpc885ads_setup.c | 1 - 5 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/8xx_immap.h b/arch/powerpc/include/asm/8xx_immap.h index bdf0563ba4238..f9cac46a95cb3 100644 --- a/arch/powerpc/include/asm/8xx_immap.h +++ b/arch/powerpc/include/asm/8xx_immap.h @@ -560,5 +560,7 @@ typedef struct immap { cpm8xx_t im_cpm; /* Communication processor */ } immap_t; +extern immap_t __iomem *mpc8xx_immr; + #endif /* __IMMAP_8XX__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index 8def56ec05c60..2b2b52b7451dc 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -29,8 +29,6 @@ #ifdef CONFIG_PPC_8xx #include -extern immap_t __iomem *mpc8xx_immr; - #define immr_map(member) (&mpc8xx_immr->member) #define immr_map_size(member, size) (&mpc8xx_immr->member) #define immr_unmap(addr) do {} while (0) diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 7e83eb6746f4a..ae72c574eb7e0 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include "mpc8xx.h" diff --git a/arch/powerpc/platforms/8xx/mpc86xads_setup.c b/arch/powerpc/platforms/8xx/mpc86xads_setup.c index 11b3d1116db14..e4192c0a3c0c7 100644 --- a/arch/powerpc/platforms/8xx/mpc86xads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc86xads_setup.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include "mpc86xads.h" diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 2fc7cacbcd968..eb4e54ba417f4 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "mpc885ads.h" From fecc436a97afed920486be609c3989e05547a384 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2023 08:04:40 +0200 Subject: [PATCH 036/135] powerpc/include: Remove mpc8260.h and m82xx_pci.h SIU_INT_IRQ1 is not used anywhere and __IO_BASE is defined in asm/io.h Remove m82xx_pci.h Then the only thing remaining in mpc8260.h is MPC82XX_BCR_PLDP Move MPC82XX_BCR_PLDP into asm/cpm2.h then remove mpc8260.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/afe23bf3624c389ff17e9789884c78c124b7b202.1691474658.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cpm2.h | 3 +++ arch/powerpc/include/asm/fs_pd.h | 4 ---- arch/powerpc/include/asm/mpc8260.h | 22 ------------------- arch/powerpc/platforms/82xx/ep8248e.c | 1 - arch/powerpc/platforms/82xx/km82xx.c | 1 - arch/powerpc/platforms/82xx/m82xx_pci.h | 14 ------------ arch/powerpc/sysdev/cpm2.c | 1 - arch/powerpc/sysdev/cpm2_pic.c | 1 - .../net/ethernet/freescale/fs_enet/mac-fcc.c | 1 - 9 files changed, 3 insertions(+), 45 deletions(-) delete mode 100644 arch/powerpc/include/asm/mpc8260.h delete mode 100644 arch/powerpc/platforms/82xx/m82xx_pci.h diff --git a/arch/powerpc/include/asm/cpm2.h b/arch/powerpc/include/asm/cpm2.h index 9ee192a6c5d76..249d43cc64275 100644 --- a/arch/powerpc/include/asm/cpm2.h +++ b/arch/powerpc/include/asm/cpm2.h @@ -1080,6 +1080,9 @@ typedef struct im_idma { #define FCC2_MEM_OFFSET FCC_MEM_OFFSET(1) #define FCC3_MEM_OFFSET FCC_MEM_OFFSET(2) +/* Pipeline Maximum Depth */ +#define MPC82XX_BCR_PLDP 0x00800000 + /* Clocks and GRG's */ enum cpm_clk_dir { diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index 2b2b52b7451dc..d251a55de8b0c 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -17,10 +17,6 @@ #ifdef CONFIG_CPM2 #include -#if defined(CONFIG_8260) -#include -#endif - #define cpm2_map(member) (&cpm2_immr->member) #define cpm2_map_size(member, size) (&cpm2_immr->member) #define cpm2_unmap(addr) do {} while(0) diff --git a/arch/powerpc/include/asm/mpc8260.h b/arch/powerpc/include/asm/mpc8260.h deleted file mode 100644 index 155114bbd1a26..0000000000000 --- a/arch/powerpc/include/asm/mpc8260.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Since there are many different boards and no standard configuration, - * we have a unique include file for each. Rather than change every - * file that has to include MPC8260 configuration, they all include - * this one and the configuration switching is done here. - */ -#ifdef __KERNEL__ -#ifndef __ASM_POWERPC_MPC8260_H__ -#define __ASM_POWERPC_MPC8260_H__ - -#define MPC82XX_BCR_PLDP 0x00800000 /* Pipeline Maximum Depth */ - -#ifdef CONFIG_8260 - -#ifdef CONFIG_PCI_8260 -#include -#endif - -#endif /* CONFIG_8260 */ -#endif /* !__ASM_POWERPC_MPC8260_H__ */ -#endif /* __KERNEL__ */ diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 4bfa1a95e1555..3dc65ce1f175d 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index 51c9bfd975925..c86da3f2b74bd 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/platforms/82xx/m82xx_pci.h b/arch/powerpc/platforms/82xx/m82xx_pci.h deleted file mode 100644 index d07c4d7606f61..0000000000000 --- a/arch/powerpc/platforms/82xx/m82xx_pci.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -#ifndef _PPC_KERNEL_M82XX_PCI_H -#define _PPC_KERNEL_M82XX_PCI_H - -/* - */ - -#define SIU_INT_IRQ1 ((uint)0x13 + CPM_IRQ_OFFSET) - -#ifndef _IO_BASE -#define _IO_BASE isa_io_base -#endif - -#endif /* _PPC_KERNEL_M8260_PCI_H */ diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index 915f4d3991c38..a926911933141 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -37,7 +37,6 @@ #include #include -#include #include #include #include diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index cb9ba4ef557a3..d6c1359ae89da 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -33,7 +33,6 @@ #include #include -#include #include #include diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index b47490be872cf..c9491b6e8708c 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -38,7 +38,6 @@ #include #include -#include #include #include From cb888cdf741c958cae3e00b649fb7ed5c9bb2d49 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2023 08:04:41 +0200 Subject: [PATCH 037/135] powerpc: Remove CONFIG_PCI_8260 CONFIG_PCI_8260 is not used anymore, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/19a4c07466ce8b80f287a06eadcc80c4ab1d2c9e.1691474658.git.christophe.leroy@csgroup.eu --- arch/powerpc/Kconfig | 6 ------ arch/powerpc/platforms/82xx/Kconfig | 1 + 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index aef38d2ca542f..b7a4cb5d9409a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1146,12 +1146,6 @@ config FSL_GTM help Freescale General-purpose Timers support -config PCI_8260 - bool - depends on PCI && 8260 - select PPC_INDIRECT_PCI - default y - config FSL_RIO bool "Freescale Embedded SRIO Controller support" depends on RAPIDIO = y && HAVE_RAPIDIO diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index 4eb372bdab70d..58e58b4f6a12e 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -32,6 +32,7 @@ config 8260 bool depends on PPC_BOOK3S_32 select CPM2 + select PPC_INDIRECT_PCI if PCI help The MPC8260 is a typical embedded CPU made by Freescale. Selecting this option means that you wish to build a kernel for a machine with From fbbf4280dae4c02d2f176a8fdac7a7d32fe76fc0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2023 08:04:42 +0200 Subject: [PATCH 038/135] powerpc/8xx: Remove immr_map() and immr_unmap() Since commit fb533d0c5a97 ("[POWERPC] 8xx: Infrastructure code cleanup.") immr_map() is just returning mpc8xxx_immr pointer and immr_unmap() do nothing. We already have parts of code that use mpc8xxx_immr directly so get rid of immr_map() and immr_unmap() by using mpc8xxx_immr directly. And avoid going through local pointers that hide the pointed structure. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/633ed46f6015ff44d5599258647ea517f75d6a1d.1691474658.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/fs_pd.h | 8 --- arch/powerpc/platforms/8xx/cpm1.c | 10 ++-- arch/powerpc/platforms/8xx/m8xx_setup.c | 66 +++++++------------------ 3 files changed, 22 insertions(+), 62 deletions(-) diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index d251a55de8b0c..82f0e528e21ca 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -22,14 +22,6 @@ #define cpm2_unmap(addr) do {} while(0) #endif -#ifdef CONFIG_PPC_8xx -#include - -#define immr_map(member) (&mpc8xx_immr->member) -#define immr_map_size(member, size) (&mpc8xx_immr->member) -#define immr_unmap(addr) do {} while (0) -#endif - static inline int uart_baudrate(void) { return get_baudrate(); diff --git a/arch/powerpc/platforms/8xx/cpm1.c b/arch/powerpc/platforms/8xx/cpm1.c index 34ab29966c8bf..ebb5f6a27dbf3 100644 --- a/arch/powerpc/platforms/8xx/cpm1.c +++ b/arch/powerpc/platforms/8xx/cpm1.c @@ -41,7 +41,7 @@ #include #include -#include +#include #ifdef CONFIG_8xx_GPIO #include @@ -54,8 +54,6 @@ immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE; void __init cpm_reset(void) { - sysconf8xx_t __iomem *siu_conf; - cpmp = &mpc8xx_immr->im_cpm; #ifndef CONFIG_PPC_EARLY_DEBUG_CPM @@ -77,12 +75,10 @@ void __init cpm_reset(void) * manual recommends it. * Bit 25, FAM can also be set to use FEC aggressive mode (860T). */ - siu_conf = immr_map(im_siu_conf); if ((mfspr(SPRN_IMMR) & 0xffff) == 0x0900) /* MPC885 */ - out_be32(&siu_conf->sc_sdcr, 0x40); + out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 0x40); else - out_be32(&siu_conf->sc_sdcr, 1); - immr_unmap(siu_conf); + out_be32(&mpc8xx_immr->im_siu_conf.sc_sdcr, 1); } static DEFINE_SPINLOCK(cmd_lock); diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 24f358f86d16c..3c5c4e08b6a99 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -22,7 +22,6 @@ #include #include -#include #include #include "pic.h" @@ -41,14 +40,11 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) void __init __attribute__ ((weak)) init_internal_rtc(void) { - sit8xx_t __iomem *sys_tmr = immr_map(im_sit); - /* Disable the RTC one second and alarm interrupts. */ - clrbits16(&sys_tmr->sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); + clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); /* Enable the RTC */ - setbits16(&sys_tmr->sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); - immr_unmap(sys_tmr); + setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); } static int __init get_freq(char *name, unsigned long *val) @@ -80,23 +76,14 @@ static int __init get_freq(char *name, unsigned long *val) void __init mpc8xx_calibrate_decr(void) { struct device_node *cpu; - cark8xx_t __iomem *clk_r1; - car8xx_t __iomem *clk_r2; - sitk8xx_t __iomem *sys_tmr1; - sit8xx_t __iomem *sys_tmr2; int irq, virq; - clk_r1 = immr_map(im_clkrstk); - /* Unlock the SCCR. */ - out_be32(&clk_r1->cark_sccrk, ~KAPWR_KEY); - out_be32(&clk_r1->cark_sccrk, KAPWR_KEY); - immr_unmap(clk_r1); + out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_clkrstk.cark_sccrk, KAPWR_KEY); /* Force all 8xx processors to use divide by 16 processor clock. */ - clk_r2 = immr_map(im_clkrst); - setbits32(&clk_r2->car_sccr, 0x02000000); - immr_unmap(clk_r2); + setbits32(&mpc8xx_immr->im_clkrst.car_sccr, 0x02000000); /* Processor frequency is MHz. */ @@ -123,14 +110,12 @@ void __init mpc8xx_calibrate_decr(void) * we guarantee the registers are locked, then we unlock them * for our use. */ - sys_tmr1 = immr_map(im_sitk); - out_be32(&sys_tmr1->sitk_tbscrk, ~KAPWR_KEY); - out_be32(&sys_tmr1->sitk_rtcsck, ~KAPWR_KEY); - out_be32(&sys_tmr1->sitk_tbk, ~KAPWR_KEY); - out_be32(&sys_tmr1->sitk_tbscrk, KAPWR_KEY); - out_be32(&sys_tmr1->sitk_rtcsck, KAPWR_KEY); - out_be32(&sys_tmr1->sitk_tbk, KAPWR_KEY); - immr_unmap(sys_tmr1); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbscrk, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY); init_internal_rtc(); @@ -144,10 +129,8 @@ void __init mpc8xx_calibrate_decr(void) of_node_put(cpu); irq = virq_to_hw(virq); - sys_tmr2 = immr_map(im_sit); - out_be16(&sys_tmr2->sit_tbscr, ((1 << (7 - (irq/2))) << 8) | - (TBSCR_TBF | TBSCR_TBE)); - immr_unmap(sys_tmr2); + out_be16(&mpc8xx_immr->im_sit.sit_tbscr, + ((1 << (7 - (irq / 2))) << 8) | (TBSCR_TBF | TBSCR_TBE)); if (request_irq(virq, timebase_interrupt, IRQF_NO_THREAD, "tbint", NULL)) @@ -161,47 +144,36 @@ void __init mpc8xx_calibrate_decr(void) int mpc8xx_set_rtc_time(struct rtc_time *tm) { - sitk8xx_t __iomem *sys_tmr1; - sit8xx_t __iomem *sys_tmr2; time64_t time; - sys_tmr1 = immr_map(im_sitk); - sys_tmr2 = immr_map(im_sit); time = rtc_tm_to_time64(tm); - out_be32(&sys_tmr1->sitk_rtck, KAPWR_KEY); - out_be32(&sys_tmr2->sit_rtc, (u32)time); - out_be32(&sys_tmr1->sitk_rtck, ~KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, KAPWR_KEY); + out_be32(&mpc8xx_immr->im_sit.sit_rtc, (u32)time); + out_be32(&mpc8xx_immr->im_sitk.sitk_rtck, ~KAPWR_KEY); - immr_unmap(sys_tmr2); - immr_unmap(sys_tmr1); return 0; } void mpc8xx_get_rtc_time(struct rtc_time *tm) { unsigned long data; - sit8xx_t __iomem *sys_tmr = immr_map(im_sit); /* Get time from the RTC. */ - data = in_be32(&sys_tmr->sit_rtc); + data = in_be32(&mpc8xx_immr->im_sit.sit_rtc); rtc_time64_to_tm(data, tm); - immr_unmap(sys_tmr); return; } void __noreturn mpc8xx_restart(char *cmd) { - car8xx_t __iomem *clk_r = immr_map(im_clkrst); - - local_irq_disable(); - setbits32(&clk_r->car_plprcr, 0x00000080); + setbits32(&mpc8xx_immr->im_clkrst.car_plprcr, 0x00000080); /* Clear the ME bit in MSR to cause checkstop on machine check */ mtmsr(mfmsr() & ~0x1000); - in_8(&clk_r->res[0]); + in_8(&mpc8xx_immr->im_clkrst.res[0]); panic("Restart failed\n"); } From 7768716d2f1906c9258ed4b39584da6317020594 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 8 Aug 2023 08:04:43 +0200 Subject: [PATCH 039/135] powerpc/cpm2: Remove cpm2_map() and cpm2_unmap() Since commit 449012daa92a ("[POWERPC] cpm2: Infrastructure code cleanup.") cpm2_map() is just returning cpm2_immr pointer and cpm2_unmap() does nothing. We already have parts of code that use cpm2_immr directly so get rid of cpm2_map() and cpm2_unmap() by using cpm2_immr directly. And avoid going through local pointers that hide the pointed structure. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/9fe6ff7284e9f968b12abe7de7c08d7ea40e29d6.1691474658.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/fs_pd.h | 8 -------- arch/powerpc/sysdev/cpm2.c | 32 +++++++++++--------------------- arch/powerpc/sysdev/cpm2_pic.c | 3 +-- 3 files changed, 12 insertions(+), 31 deletions(-) diff --git a/arch/powerpc/include/asm/fs_pd.h b/arch/powerpc/include/asm/fs_pd.h index 82f0e528e21ca..d530f68b4eef6 100644 --- a/arch/powerpc/include/asm/fs_pd.h +++ b/arch/powerpc/include/asm/fs_pd.h @@ -14,14 +14,6 @@ #include #include -#ifdef CONFIG_CPM2 -#include - -#define cpm2_map(member) (&cpm2_immr->member) -#define cpm2_map_size(member, size) (&cpm2_immr->member) -#define cpm2_unmap(addr) do {} while(0) -#endif - static inline int uart_baudrate(void) { return get_baudrate(); diff --git a/arch/powerpc/sysdev/cpm2.c b/arch/powerpc/sysdev/cpm2.c index a926911933141..14cc5ea936c0a 100644 --- a/arch/powerpc/sysdev/cpm2.c +++ b/arch/powerpc/sysdev/cpm2.c @@ -40,7 +40,6 @@ #include #include #include -#include #include @@ -118,9 +117,9 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) /* This is good enough to get SMCs running..... */ if (brg < 4) { - bp = cpm2_map_size(im_brgc1, 16); + bp = &cpm2_immr->im_brgc1; } else { - bp = cpm2_map_size(im_brgc5, 16); + bp = &cpm2_immr->im_brgc5; brg -= 4; } bp += brg; @@ -130,7 +129,6 @@ void __cpm2_setbrg(uint brg, uint rate, uint clk, int div16, int src) val |= CPM_BRG_DIV16; out_be32(bp, val); - cpm2_unmap(bp); } EXPORT_SYMBOL(__cpm2_setbrg); @@ -139,7 +137,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) int ret = 0; int shift; int i, bits = 0; - cpmux_t __iomem *im_cpmux; u32 __iomem *reg; u32 mask = 7; @@ -202,35 +199,33 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) {CPM_CLK_SCC4, CPM_CLK8, 7}, }; - im_cpmux = cpm2_map(im_cpmux); - switch (target) { case CPM_CLK_SCC1: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 24; break; case CPM_CLK_SCC2: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 16; break; case CPM_CLK_SCC3: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 8; break; case CPM_CLK_SCC4: - reg = &im_cpmux->cmx_scr; + reg = &cpm2_immr->im_cpmux.cmx_scr; shift = 0; break; case CPM_CLK_FCC1: - reg = &im_cpmux->cmx_fcr; + reg = &cpm2_immr->im_cpmux.cmx_fcr; shift = 24; break; case CPM_CLK_FCC2: - reg = &im_cpmux->cmx_fcr; + reg = &cpm2_immr->im_cpmux.cmx_fcr; shift = 16; break; case CPM_CLK_FCC3: - reg = &im_cpmux->cmx_fcr; + reg = &cpm2_immr->im_cpmux.cmx_fcr; shift = 8; break; default: @@ -260,7 +255,6 @@ int __init cpm2_clk_setup(enum cpm_clk_target target, int clock, int mode) out_be32(reg, (in_be32(reg) & ~mask) | bits); - cpm2_unmap(im_cpmux); return ret; } @@ -269,7 +263,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) int ret = 0; int shift; int i, bits = 0; - cpmux_t __iomem *im_cpmux; u8 __iomem *reg; u8 mask = 3; @@ -284,16 +277,14 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) {CPM_CLK_SMC2, CPM_CLK15, 3}, }; - im_cpmux = cpm2_map(im_cpmux); - switch (target) { case CPM_CLK_SMC1: - reg = &im_cpmux->cmx_smr; + reg = &cpm2_immr->im_cpmux.cmx_smr; mask = 3; shift = 4; break; case CPM_CLK_SMC2: - reg = &im_cpmux->cmx_smr; + reg = &cpm2_immr->im_cpmux.cmx_smr; mask = 3; shift = 0; break; @@ -316,7 +307,6 @@ int __init cpm2_smc_clk_setup(enum cpm_clk_target target, int clock) out_8(reg, (in_8(reg) & ~mask) | bits); - cpm2_unmap(im_cpmux); return ret; } diff --git a/arch/powerpc/sysdev/cpm2_pic.c b/arch/powerpc/sysdev/cpm2_pic.c index d6c1359ae89da..e14493685fe87 100644 --- a/arch/powerpc/sysdev/cpm2_pic.c +++ b/arch/powerpc/sysdev/cpm2_pic.c @@ -34,7 +34,6 @@ #include #include -#include #include "cpm2_pic.h" @@ -230,7 +229,7 @@ void cpm2_pic_init(struct device_node *node) { int i; - cpm2_intctl = cpm2_map(im_intctl); + cpm2_intctl = &cpm2_immr->im_intctl; /* Clear the CPM IRQ controller, in case it has any bits set * from the bootloader From 0d5769f9503d9a88661b82fee6a320e711f8b01a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Jun 2023 12:38:10 +0200 Subject: [PATCH 040/135] powerpc/step: Mark __copy_mem_out() and __emulate_dcbz() __always_inline objtool reports two folliwng warnings: arch/powerpc/lib/sstep.o: warning: objtool: copy_mem_out+0x3c (.text+0x30c): call to __copy_mem_out() with UACCESS enabled arch/powerpc/lib/sstep.o: warning: objtool: emulate_dcbz+0x70 (.text+0x4dc): call to __emulate_dcbz() with UACCESS enabled Mark __copy_mem_out() and __emulate_dcbz() __always_inline Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/f1d4a15da70190f8c2fcddb377bbc1e09827242c.1687343857.git.christophe.leroy@csgroup.eu --- arch/powerpc/lib/sstep.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 38158b77a8019..a4ab8625061a6 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -485,7 +485,7 @@ write_mem_aligned(unsigned long val, unsigned long ea, int nb, struct pt_regs *r * Copy from a buffer to userspace, using the largest possible * aligned accesses, up to sizeof(long). */ -static nokprobe_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) +static __always_inline int __copy_mem_out(u8 *dest, unsigned long ea, int nb, struct pt_regs *regs) { int c; @@ -1043,7 +1043,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op, } #endif /* CONFIG_VSX */ -static int __emulate_dcbz(unsigned long ea) +static __always_inline int __emulate_dcbz(unsigned long ea) { unsigned long i; unsigned long size = l1_dcache_bytes(); From 6039fcd3fba99451ea9d013c4d3a65a40b2feff0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 21 Jun 2023 12:40:50 +0200 Subject: [PATCH 041/135] powerpc/reg: Remove #ifdef around mtspr macro That ifdef was introduced by commit 1458dd951f7c ("powerpc/8xx: Handle CPU6 ERRATA directly in mtspr() macro") and left over by commit 2a45addd21de ("powerpc/8xx: Remove CPU6 ERRATA Workaround") Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/cf652e47ea9e453e89813611b6f76d0939a12063.1687344017.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/reg.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bb0121222ee3c..4ae4ab9090a2d 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1414,11 +1414,9 @@ static inline void mtmsr_isync(unsigned long val) #define mfspr(rn) ({unsigned long rval; \ asm volatile("mfspr %0," __stringify(rn) \ : "=r" (rval)); rval;}) -#ifndef mtspr #define mtspr(rn, v) asm volatile("mtspr " __stringify(rn) ",%0" : \ : "r" ((unsigned long)(v)) \ : "memory") -#endif #define wrtspr(rn) asm volatile("mtspr " __stringify(rn) ",2" : : : "memory") static inline void wrtee(unsigned long val) From 0e1cd3d9f82eb5440d32d4c0f12c65403b956cb5 Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 4 Aug 2023 15:56:30 +0800 Subject: [PATCH 042/135] cxl: Use pci_find_vsec_capability() to simplify the code PCI core add pci_find_vsec_capability() to query VSEC. We can use that core API to simplify the code. The only logical change is that pci_find_vsec_capability check the Vendor ID before finding the VSEC. PCI spec rev 5.0 says in 7.9.5.2 Vendor-Specific Header: VSEC ID - This field is a vendor-defined ID number that indicates the nature and format of the VSEC structure Software must qualify the Vendor ID before interpreting this field. Signed-off-by: Xiongfeng Wang Reviewed-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://msgid.link/20230804075630.186054-1-wangxiongfeng2@huawei.com --- drivers/misc/cxl/pci.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 0ff944860dda9..4cf9e7c42a244 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -150,16 +150,7 @@ static inline resource_size_t p2_size(struct pci_dev *dev) static int find_cxl_vsec(struct pci_dev *dev) { - int vsec = 0; - u16 val; - - while ((vsec = pci_find_next_ext_capability(dev, vsec, PCI_EXT_CAP_ID_VNDR))) { - pci_read_config_word(dev, vsec + 0x4, &val); - if (val == CXL_PCI_VSEC_ID) - return vsec; - } - return 0; - + return pci_find_vsec_capability(dev, PCI_VENDOR_ID_IBM, CXL_PCI_VSEC_ID); } static void dump_cxl_config_space(struct pci_dev *dev) From ae7936d232d862e5b8311180036281ffe93735b8 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 3 Aug 2023 11:33:52 -0700 Subject: [PATCH 043/135] powerpc/inst: add PPC_TLBILX_LPID Clang didn't recognize the instruction tlbilxlpid. This was fixed in clang-18 [0] then backported to clang-17 [1]. To support clang-16 and older, rather than using that instruction bare in inline asm, add it to ppc-opcode.h and use that macro as is done elsewhere for other instructions. Link: https://github.com/ClangBuiltLinux/linux/issues/1891 Link: https://github.com/llvm/llvm-project/issues/64080 Link: https://github.com/llvm/llvm-project/commit/53648ac1d0c953ae6d008864dd2eddb437a92468 [0] Link: https://github.com/llvm/llvm-project-release-prs/commit/0af7e5e54a8c7ac665773ac1ada328713e8338f5 [1] Reported-by: kernel test robot Closes: https://lore.kernel.org/llvm/202307211945.TSPcyOhh-lkp@intel.com/ Suggested-by: Michael Ellerman Signed-off-by: Nick Desaulniers Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230803-ppc_tlbilxlpid-v3-1-ca84739bfd73@google.com --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/kvm/e500mc.c | 7 ++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index ef6972aa33b92..005601243dda4 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -397,6 +397,7 @@ #define PPC_RAW_RFCI (0x4c000066) #define PPC_RAW_RFDI (0x4c00004e) #define PPC_RAW_RFMCI (0x4c00004c) +#define PPC_RAW_TLBILX_LPID (0x7c000024) #define PPC_RAW_TLBILX(t, a, b) (0x7c000024 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_RAW_WAIT_v203 (0x7c00007c) #define PPC_RAW_WAIT(w, p) (0x7c00003c | __PPC_WC(w) | __PPC_PL(p)) @@ -616,6 +617,7 @@ #define PPC_TLBILX(t, a, b) stringify_in_c(.long PPC_RAW_TLBILX(t, a, b)) #define PPC_TLBILX_ALL(a, b) PPC_TLBILX(0, a, b) #define PPC_TLBILX_PID(a, b) PPC_TLBILX(1, a, b) +#define PPC_TLBILX_LPID stringify_in_c(.long PPC_RAW_TLBILX_LPID) #define PPC_TLBILX_VA(a, b) PPC_TLBILX(3, a, b) #define PPC_WAIT_v203 stringify_in_c(.long PPC_RAW_WAIT_v203) #define PPC_WAIT(w, p) stringify_in_c(.long PPC_RAW_WAIT(w, p)) diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index d58df71ace584..e476e107a932b 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "booke.h" #include "e500.h" @@ -92,7 +93,11 @@ void kvmppc_e500_tlbil_all(struct kvmppc_vcpu_e500 *vcpu_e500) local_irq_save(flags); mtspr(SPRN_MAS5, MAS5_SGS | get_lpid(&vcpu_e500->vcpu)); - asm volatile("tlbilxlpid"); + /* + * clang-17 and older could not assemble tlbilxlpid. + * https://github.com/ClangBuiltLinux/linux/issues/1891 + */ + asm volatile (PPC_TLBILX_LPID); mtspr(SPRN_MAS5, 0); local_irq_restore(flags); } From 3eb3f168e83aa7a7b8477507cf4b08b9515b4b13 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 7 Aug 2023 00:09:52 +0900 Subject: [PATCH 044/135] powerpc: remove unneeded #include There is no EXPORT_SYMBOL line there, hence #include is unneeded. Signed-off-by: Masahiro Yamada Signed-off-by: Michael Ellerman Link: https://msgid.link/20230806150954.394189-1-masahiroy@kernel.org --- arch/powerpc/kernel/entry_32.S | 1 - arch/powerpc/kernel/head_40x.S | 1 - arch/powerpc/kernel/head_44x.S | 1 - arch/powerpc/kernel/head_64.S | 1 - arch/powerpc/kernel/head_85xx.S | 1 - arch/powerpc/kernel/head_8xx.S | 1 - arch/powerpc/kernel/head_book3s_32.S | 1 - arch/powerpc/kernel/trace/ftrace_64_pg.S | 1 - arch/powerpc/kernel/trace/ftrace_mprofile.S | 1 - 9 files changed, 9 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index fe27d41f9a3de..9692acb0361fb 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index 3f68a16246462..b32e7b2ebdcfd 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -38,7 +38,6 @@ #include #include #include -#include #include "head_32.h" diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S index 63a85c16fef46..a3197c9f721cd 100644 --- a/arch/powerpc/kernel/head_44x.S +++ b/arch/powerpc/kernel/head_44x.S @@ -35,7 +35,6 @@ #include #include #include -#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f132d8704263c..bc588ff7bb3e9 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -40,7 +40,6 @@ #include #include #include -#include #include #ifdef CONFIG_PPC_BOOK3S #include diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S index fdbee1093e2ba..97e9ea0c72979 100644 --- a/arch/powerpc/kernel/head_85xx.S +++ b/arch/powerpc/kernel/head_85xx.S @@ -40,7 +40,6 @@ #include #include #include -#include #include #include "head_booke.h" diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index a79751e05781d..647b0b445e89d 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -29,7 +29,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index c51f28b5abc05..6764b98ca360f 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -31,7 +31,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S index 6708e24db0aba..cdbcb5a0783b3 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.S @@ -8,7 +8,6 @@ #include #include #include -#include _GLOBAL_TOC(ftrace_caller) lbz r3, PACA_FTRACE_ENABLED(r13) diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index ffb1db3868499..15fc75ffff32e 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include From 393261828740c3ed95fc810c3f4c1018b86af7e5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 7 Aug 2023 00:09:53 +0900 Subject: [PATCH 045/135] powerpc: replace #include with #include Commit ddb5cdbafaaa ("kbuild: generate KSYMTAB entries by modpost") deprecated , which is now a wrapper of . Replace #include with #include . After all the lines are converted, and will be removed. Signed-off-by: Masahiro Yamada [mpe: Fixup selftests that stub asm/export.h] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230806150954.394189-2-masahiroy@kernel.org --- arch/powerpc/kernel/epapr_hcalls.S | 2 +- arch/powerpc/kernel/fpu.S | 2 +- arch/powerpc/kernel/misc.S | 2 +- arch/powerpc/kernel/misc_32.S | 2 +- arch/powerpc/kernel/misc_64.S | 2 +- arch/powerpc/kernel/tm.S | 2 +- arch/powerpc/kernel/trace/ftrace_low.S | 2 +- arch/powerpc/kernel/ucall.S | 2 +- arch/powerpc/kernel/vector.S | 2 +- arch/powerpc/kvm/book3s_64_entry.S | 2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/kvm/tm.S | 2 +- arch/powerpc/lib/checksum_32.S | 2 +- arch/powerpc/lib/checksum_64.S | 2 +- arch/powerpc/lib/copy_32.S | 2 +- arch/powerpc/lib/copy_mc_64.S | 2 +- arch/powerpc/lib/copypage_64.S | 2 +- arch/powerpc/lib/copyuser_64.S | 2 +- arch/powerpc/lib/hweight_64.S | 2 +- arch/powerpc/lib/mem_64.S | 2 +- arch/powerpc/lib/memcmp_32.S | 2 +- arch/powerpc/lib/memcmp_64.S | 2 +- arch/powerpc/lib/memcpy_64.S | 2 +- arch/powerpc/lib/string.S | 2 +- arch/powerpc/lib/string_32.S | 2 +- arch/powerpc/lib/string_64.S | 2 +- arch/powerpc/lib/strlen_32.S | 2 +- arch/powerpc/mm/book3s32/hash_low.S | 2 +- arch/powerpc/sysdev/dcr-low.S | 2 +- .../testing/selftests/powerpc/copyloops/{asm => linux}/export.h | 0 .../selftests/powerpc/stringloops/{asm => linux}/export.h | 0 31 files changed, 29 insertions(+), 29 deletions(-) rename tools/testing/selftests/powerpc/copyloops/{asm => linux}/export.h (100%) rename tools/testing/selftests/powerpc/stringloops/{asm => linux}/export.h (100%) diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S index 033116e465d08..1a9b5ae8ccb2f 100644 --- a/arch/powerpc/kernel/epapr_hcalls.S +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -3,6 +3,7 @@ * Copyright (C) 2012 Freescale Semiconductor, Inc. */ +#include #include #include #include @@ -12,7 +13,6 @@ #include #include #include -#include #ifndef CONFIG_PPC64 /* epapr_ev_idle() was derived from e500_idle() */ diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index f71f2bbd4de64..6a9acfb690c9f 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -9,6 +9,7 @@ * Copyright (C) 1997 Dan Malek (dmalek@jlc.net). */ +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S index fb7de3543c037..29e1440d14cc1 100644 --- a/arch/powerpc/kernel/misc.S +++ b/arch/powerpc/kernel/misc.S @@ -10,11 +10,11 @@ * * setjmp/longjmp code by Paul Mackerras. */ +#include #include #include #include #include -#include .text diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S index daf8f87d23728..2eabb15687a64 100644 --- a/arch/powerpc/kernel/misc_32.S +++ b/arch/powerpc/kernel/misc_32.S @@ -8,6 +8,7 @@ * */ +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include .text diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 2c9ac70aaf0c9..1a8cdafd68e85 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -9,6 +9,7 @@ * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com) */ +#include #include #include #include @@ -23,7 +24,6 @@ #include #include #include -#include #include .text diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 9feab5e0485bf..a9cd6507163ab 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -6,13 +6,13 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include #include #include #include #include #include #include -#include #include #ifdef CONFIG_VSX diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S index 294d1e05958aa..5e271f87f7990 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -3,12 +3,12 @@ * Split from entry_64.S */ +#include #include #include #include #include #include -#include #ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S index 07296bc391664..80a1f9a4300ae 100644 --- a/arch/powerpc/kernel/ucall.S +++ b/arch/powerpc/kernel/ucall.S @@ -5,8 +5,8 @@ * Copyright 2019, IBM Corporation. * */ +#include #include -#include _GLOBAL(ucall_norets) EXPORT_SYMBOL_GPL(ucall_norets) diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index fcc0ad6d9c7b0..4094e4c4c77a7 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -1,4 +1,5 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include #include #include #include @@ -8,7 +9,6 @@ #include #include #include -#include #include /* diff --git a/arch/powerpc/kvm/book3s_64_entry.S b/arch/powerpc/kvm/book3s_64_entry.S index 6c2b1d17cb636..3b361af873135 100644 --- a/arch/powerpc/kvm/book3s_64_entry.S +++ b/arch/powerpc/kvm/book3s_64_entry.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 870110e3d9b1e..ea7ad200b330f 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -10,6 +10,7 @@ * Authors: Alexander Graf */ +#include #include #include #include @@ -24,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S index 2158f61e317fc..b506c4d9a8d90 100644 --- a/arch/powerpc/kvm/tm.S +++ b/arch/powerpc/kvm/tm.S @@ -6,10 +6,10 @@ * Copyright 2011 Paul Mackerras, IBM Corp. */ +#include #include #include #include -#include #include #include diff --git a/arch/powerpc/lib/checksum_32.S b/arch/powerpc/lib/checksum_32.S index 4541e8e294679..cd00b9bdd7727 100644 --- a/arch/powerpc/lib/checksum_32.S +++ b/arch/powerpc/lib/checksum_32.S @@ -8,12 +8,12 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include #include #include #include #include #include -#include .text diff --git a/arch/powerpc/lib/checksum_64.S b/arch/powerpc/lib/checksum_64.S index 98ff51bd2f7dc..d53d8f09a2c2b 100644 --- a/arch/powerpc/lib/checksum_64.S +++ b/arch/powerpc/lib/checksum_64.S @@ -8,11 +8,11 @@ * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). */ +#include #include #include #include #include -#include /* * Computes the checksum of a memory block at buff, length len, diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S index 3e9c27c463310..933b685e7ab68 100644 --- a/arch/powerpc/lib/copy_32.S +++ b/arch/powerpc/lib/copy_32.S @@ -4,11 +4,11 @@ * * Copyright (C) 1996-2005 Paul Mackerras. */ +#include #include #include #include #include -#include #include #include diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S index 88d46c471493b..bf1014b28fe83 100644 --- a/arch/powerpc/lib/copy_mc_64.S +++ b/arch/powerpc/lib/copy_mc_64.S @@ -4,9 +4,9 @@ * Derived from copyuser_power7.s by Anton Blanchard * Author - Balbir Singh */ +#include #include #include -#include .macro err1 100: diff --git a/arch/powerpc/lib/copypage_64.S b/arch/powerpc/lib/copypage_64.S index 5d09a029b5567..f33a2e6088e51 100644 --- a/arch/powerpc/lib/copypage_64.S +++ b/arch/powerpc/lib/copypage_64.S @@ -2,11 +2,11 @@ /* * Copyright (C) 2008 Mark Nelson, IBM Corp. */ +#include #include #include #include #include -#include #include _GLOBAL_TOC(copy_page) diff --git a/arch/powerpc/lib/copyuser_64.S b/arch/powerpc/lib/copyuser_64.S index db8719a14846d..9af969d2cc0cb 100644 --- a/arch/powerpc/lib/copyuser_64.S +++ b/arch/powerpc/lib/copyuser_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include #include #include -#include #include #include diff --git a/arch/powerpc/lib/hweight_64.S b/arch/powerpc/lib/hweight_64.S index 09af29561314e..151875050da9c 100644 --- a/arch/powerpc/lib/hweight_64.S +++ b/arch/powerpc/lib/hweight_64.S @@ -5,9 +5,9 @@ * * Author: Anton Blanchard */ +#include #include #include -#include #include /* Note: This code relies on -mminimal-toc */ diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S index 9351ffab409cf..6fd06cd20faa0 100644 --- a/arch/powerpc/lib/mem_64.S +++ b/arch/powerpc/lib/mem_64.S @@ -4,10 +4,10 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include #include #include #include -#include #include #ifndef CONFIG_KASAN diff --git a/arch/powerpc/lib/memcmp_32.S b/arch/powerpc/lib/memcmp_32.S index 5010e376f7b83..f6fca5664e91c 100644 --- a/arch/powerpc/lib/memcmp_32.S +++ b/arch/powerpc/lib/memcmp_32.S @@ -7,8 +7,8 @@ * */ +#include #include -#include .text diff --git a/arch/powerpc/lib/memcmp_64.S b/arch/powerpc/lib/memcmp_64.S index 0b9b1685a33dc..142c666d38978 100644 --- a/arch/powerpc/lib/memcmp_64.S +++ b/arch/powerpc/lib/memcmp_64.S @@ -3,8 +3,8 @@ * Author: Anton Blanchard * Copyright 2015 IBM Corporation. */ +#include #include -#include #include #define off8 r6 diff --git a/arch/powerpc/lib/memcpy_64.S b/arch/powerpc/lib/memcpy_64.S index 016c91e958d8f..b5a67e20143f8 100644 --- a/arch/powerpc/lib/memcpy_64.S +++ b/arch/powerpc/lib/memcpy_64.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2002 Paul Mackerras, IBM Corp. */ +#include #include #include -#include #include #include #include diff --git a/arch/powerpc/lib/string.S b/arch/powerpc/lib/string.S index 2752b1cc1d45f..daa72061dc0c7 100644 --- a/arch/powerpc/lib/string.S +++ b/arch/powerpc/lib/string.S @@ -4,8 +4,8 @@ * * Copyright (C) 1996 Paul Mackerras. */ +#include #include -#include #include .text diff --git a/arch/powerpc/lib/string_32.S b/arch/powerpc/lib/string_32.S index 1ddb26394e8ac..3ee45619a3f82 100644 --- a/arch/powerpc/lib/string_32.S +++ b/arch/powerpc/lib/string_32.S @@ -7,8 +7,8 @@ * */ +#include #include -#include #include .text diff --git a/arch/powerpc/lib/string_64.S b/arch/powerpc/lib/string_64.S index df41ce06f86bd..a25eb8588434d 100644 --- a/arch/powerpc/lib/string_64.S +++ b/arch/powerpc/lib/string_64.S @@ -6,10 +6,10 @@ * Author: Anton Blanchard */ +#include #include #include #include -#include /** * __arch_clear_user: - Zero a block of memory in user space, with less checking. diff --git a/arch/powerpc/lib/strlen_32.S b/arch/powerpc/lib/strlen_32.S index 0a8d3f64d4935..bbd24feb233f2 100644 --- a/arch/powerpc/lib/strlen_32.S +++ b/arch/powerpc/lib/strlen_32.S @@ -6,8 +6,8 @@ * * Inspired from glibc implementation */ +#include #include -#include #include .text diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S index a5a21d444e729..8b804e1a9fa44 100644 --- a/arch/powerpc/mm/book3s32/hash_low.S +++ b/arch/powerpc/mm/book3s32/hash_low.S @@ -14,6 +14,7 @@ * hash table, so this file is not used on them.) */ +#include #include #include #include @@ -22,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/arch/powerpc/sysdev/dcr-low.S b/arch/powerpc/sysdev/dcr-low.S index 329b9c4ae5429..e8401b205d380 100644 --- a/arch/powerpc/sysdev/dcr-low.S +++ b/arch/powerpc/sysdev/dcr-low.S @@ -5,10 +5,10 @@ * Copyright (c) 2004 Eugene Surovegin */ +#include #include #include #include -#include #define DCR_ACCESS_PROLOG(table) \ cmplwi cr0,r3,1024; \ diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/linux/export.h similarity index 100% rename from tools/testing/selftests/powerpc/copyloops/asm/export.h rename to tools/testing/selftests/powerpc/copyloops/linux/export.h diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/linux/export.h similarity index 100% rename from tools/testing/selftests/powerpc/stringloops/asm/export.h rename to tools/testing/selftests/powerpc/stringloops/linux/export.h From efa1f85019537ce44832cf73a6db18611e3e41cd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 7 Aug 2023 00:09:54 +0900 Subject: [PATCH 046/135] powerpc: remove All *.S files under arch/powerpc/ have been converted to include instead of . Remove . Signed-off-by: Masahiro Yamada Signed-off-by: Michael Ellerman Link: https://msgid.link/20230806150954.394189-3-masahiroy@kernel.org --- arch/powerpc/include/asm/Kbuild | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index 419319c4963cc..61a8d5555cd7e 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild @@ -3,7 +3,6 @@ generated-y += syscall_table_32.h generated-y += syscall_table_64.h generated-y += syscall_table_spu.h generic-y += agp.h -generic-y += export.h generic-y += kvm_types.h generic-y += mcs_spinlock.h generic-y += qrwlock.h From afda85b963c12947e298ad85d757e333aa40fd74 Mon Sep 17 00:00:00 2001 From: ruanjinjie Date: Thu, 10 Nov 2022 09:19:29 +0800 Subject: [PATCH 047/135] powerpc/pseries: fix possible memory leak in ibmebus_bus_init() If device_register() returns error in ibmebus_bus_init(), name of kobject which is allocated in dev_set_name() called in device_add() is leaked. As comment of device_add() says, it should call put_device() to drop the reference count that was set in device_initialize() when it fails, so the name can be freed in kobject_cleanup(). Signed-off-by: ruanjinjie Signed-off-by: Michael Ellerman Link: https://msgid.link/20221110011929.3709774-1-ruanjinjie@huawei.com --- arch/powerpc/platforms/pseries/ibmebus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 183aa8de48e74..998e3aff24572 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -461,6 +461,7 @@ static int __init ibmebus_bus_init(void) if (err) { printk(KERN_WARNING "%s: device_register returned %i\n", __func__, err); + put_device(&ibmebus_bus_device); bus_unregister(&ibmebus_bus_type); return err; From 984b07b428994d9e83c6de9a5f1307948188fe71 Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Mon, 31 Jul 2023 17:25:39 +0530 Subject: [PATCH 048/135] powerpc/xics: Remove unnecessary endian conversion Remove an unnecessary piece of code that does an endianness conversion but does not use the result. The following warning was reported by Clang's static analyzer: arch/powerpc/sysdev/xics/ics-opal.c:114:2: warning: Value stored to 'server' is never read [deadcode.DeadStores] server = be16_to_cpu(oserver); 'server' was used as a parameter to opal_get_xive() in commit 5c7c1e9444d8 ("powerpc/powernv: Add OPAL ICS backend") when it was introduced. 'server' was also used in an error message for the call to opal_get_xive(). 'server' was always later set by a call to ics_opal_mangle_server() before being used. Commit bf8e0f891a32 ("powerpc/powernv: Fix endian issues in OPAL ICS backend") used a new variable 'oserver' as the parameter to opal_get_xive() instead of 'server' for endian correctness. It also removed 'server' from the error message for the call to opal_get_xive(). Fix the warning by removing the server variable assignment. Fixes: bf8e0f891a32 ("powerpc/powernv: Fix endian issues in OPAL ICS backend") Reviewed-by: Jordan Niethe Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://msgid.link/20230731115543.36991-1-gautam@linux.ibm.com --- arch/powerpc/sysdev/xics/ics-opal.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/sysdev/xics/ics-opal.c b/arch/powerpc/sysdev/xics/ics-opal.c index 6cfbb4fac7fb6..5fe73dabab791 100644 --- a/arch/powerpc/sysdev/xics/ics-opal.c +++ b/arch/powerpc/sysdev/xics/ics-opal.c @@ -111,7 +111,6 @@ static int ics_opal_set_affinity(struct irq_data *d, __func__, d->irq, hw_irq, rc); return -1; } - server = be16_to_cpu(oserver); wanted_server = xics_get_irq_server(d->irq, cpumask, 1); if (wanted_server < 0) { From fe8aa8e3379326ecb77203cae50e8e83c054aedc Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Fri, 4 Aug 2023 16:04:35 +0800 Subject: [PATCH 049/135] powerpc/powernv/pci: use pci_dev_id() to simplify the code PCI core API pci_dev_id() can be used to get the BDF number for a pci device. We don't need to compose it mannually. Use pci_dev_id() to simplify the code a little bit. Signed-off-by: Xiongfeng Wang Reviewed-by: Frederic Barrat Signed-off-by: Michael Ellerman Link: https://msgid.link/20230804080435.191196-1-wangxiongfeng2@huawei.com --- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index cb637827bc585..28fac47700731 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -997,14 +997,14 @@ static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev) struct pnv_ioda_pe *pe; /* Check if the BDFN for this device is associated with a PE yet */ - pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); if (!pe) { /* VF PEs should be pre-configured in pnv_pci_sriov_enable() */ if (WARN_ON(pdev->is_virtfn)) return; pnv_pci_configure_bus(pdev->bus); - pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); pci_info(pdev, "Configured PE#%x\n", pe ? pe->pe_number : 0xfffff); @@ -2526,7 +2526,7 @@ static struct iommu_group *pnv_pci_device_group(struct pci_controller *hose, if (WARN_ON(!phb)) return ERR_PTR(-ENODEV); - pe = pnv_pci_bdfn_to_pe(phb, pdev->devfn | (pdev->bus->number << 8)); + pe = pnv_pci_bdfn_to_pe(phb, pci_dev_id(pdev)); if (!pe) return ERR_PTR(-ENODEV); From 58b6fed89ab0f602de0d143c617c29c3d4c67429 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 9 Aug 2023 10:07:13 +0200 Subject: [PATCH 050/135] powerpc: Make virt_to_pfn() a static inline Making virt_to_pfn() a static inline taking a strongly typed (const void *) makes the contract of a passing a pointer of that type to the function explicit and exposes any misuse of the macro virt_to_pfn() acting polymorphic and accepting many types such as (void *), (unitptr_t) or (unsigned long) as arguments without warnings. Move the virt_to_pfn() and related functions below the declaration of __pa() so it compiles. For symmetry do the same with pfn_to_kaddr(). As the file is included right into the linker file, we need to surround the functions with ifndef __ASSEMBLY__ so we don't cause compilation errors. The conversion moreover exposes the fact that pmd_page_vaddr() was returning an unsigned long rather than a const void * as could be expected, so all the sites defining pmd_page_vaddr() had to be augmented as well. Finally the KVM code in book3s_64_mmu_hv.c was passing an unsigned int to virt_to_phys() so fix that up with a cast so the result compiles. Signed-off-by: Linus Walleij [mpe: Fixup kfence.h, simplify pfn_to_kaddr() & pmd_page_vaddr()] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230809-virt-to-phys-powerpc-v1-1-12e912a7d439@linaro.org --- arch/powerpc/include/asm/kfence.h | 2 +- arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +- arch/powerpc/include/asm/nohash/64/pgtable.h | 2 +- arch/powerpc/include/asm/page.h | 30 +++++++++++++------- arch/powerpc/include/asm/pgtable.h | 4 +-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- 6 files changed, 26 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/include/asm/kfence.h b/arch/powerpc/include/asm/kfence.h index 6fd2b4d486c52..424ceef82ae61 100644 --- a/arch/powerpc/include/asm/kfence.h +++ b/arch/powerpc/include/asm/kfence.h @@ -23,7 +23,7 @@ static inline bool arch_kfence_init_pool(void) #ifdef CONFIG_PPC64 static inline bool kfence_protect_page(unsigned long addr, bool protect) { - struct page *page = virt_to_page(addr); + struct page *page = virt_to_page((void *)addr); __kernel_map_pages(page, 1, !protect); diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index fec56d965f00d..d6201b5096b83 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -355,7 +355,7 @@ static inline int pte_young(pte_t pte) #define pmd_pfn(pmd) (pmd_val(pmd) >> PAGE_SHIFT) #else #define pmd_page_vaddr(pmd) \ - ((unsigned long)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1))) + ((const void *)(pmd_val(pmd) & ~(PTE_TABLE_SIZE - 1))) #define pmd_pfn(pmd) (__pa(pmd_val(pmd)) >> PAGE_SHIFT) #endif diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h index 287e25864ffae..81c801880933e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h @@ -127,7 +127,7 @@ static inline pte_t pmd_pte(pmd_t pmd) #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \ || (pmd_val(pmd) & PMD_BAD_BITS)) #define pmd_present(pmd) (!pmd_none(pmd)) -#define pmd_page_vaddr(pmd) (pmd_val(pmd) & ~PMD_MASKED_BITS) +#define pmd_page_vaddr(pmd) ((const void *)(pmd_val(pmd) & ~PMD_MASKED_BITS)) extern struct page *pmd_page(pmd_t pmd); #define pmd_pfn(pmd) (page_to_pfn(pmd_page(pmd))) diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index f2b6bf5687d0e..e5fcc79b5bfba 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -9,6 +9,7 @@ #ifndef __ASSEMBLY__ #include #include +#include #else #include #endif @@ -119,16 +120,6 @@ extern long long virt_phys_offset; #define ARCH_PFN_OFFSET ((unsigned long)(MEMORY_START >> PAGE_SHIFT)) #endif -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) -#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) - -#define virt_addr_valid(vaddr) ({ \ - unsigned long _addr = (unsigned long)vaddr; \ - _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ - pfn_valid(virt_to_pfn(_addr)); \ -}) - /* * On Book-E parts we need __va to parse the device tree and we can't * determine MEMORY_START until then. However we can determine PHYSICAL_START @@ -233,6 +224,25 @@ extern long long virt_phys_offset; #endif #endif +#ifndef __ASSEMBLY__ +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __pa(kaddr) >> PAGE_SHIFT; +} + +static inline const void *pfn_to_kaddr(unsigned long pfn) +{ + return __va(pfn << PAGE_SHIFT); +} +#endif + +#define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) +#define virt_addr_valid(vaddr) ({ \ + unsigned long _addr = (unsigned long)vaddr; \ + _addr >= PAGE_OFFSET && _addr < (unsigned long)high_memory && \ + pfn_valid(virt_to_pfn((void *)_addr)); \ +}) + /* * Unfortunately the PLT is in the BSS in the PPC32 ELF ABI, * and needs to be executable. This means the whole heap ends diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index 6a88bfdaa69b6..a92f1f65d0a7b 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -60,9 +60,9 @@ static inline pgprot_t pte_pgprot(pte_t pte) } #ifndef pmd_page_vaddr -static inline unsigned long pmd_page_vaddr(pmd_t pmd) +static inline const void *pmd_page_vaddr(pmd_t pmd) { - return ((unsigned long)__va(pmd_val(pmd) & ~PMD_MASKED_BITS)); + return __va(pmd_val(pmd) & ~PMD_MASKED_BITS); } #define pmd_page_vaddr pmd_page_vaddr #endif diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 7f765d5ad4366..efd0ebf70a5e6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -182,7 +182,7 @@ void kvmppc_free_hpt(struct kvm_hpt_info *info) vfree(info->rev); info->rev = NULL; if (info->cma) - kvm_free_hpt_cma(virt_to_page(info->virt), + kvm_free_hpt_cma(virt_to_page((void *)info->virt), 1 << (info->order - PAGE_SHIFT)); else if (info->virt) free_pages(info->virt, info->order - PAGE_SHIFT); From 71f1c39647d8c9d4d54a861ec81f1ff17544bcb6 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:46 +0530 Subject: [PATCH 051/135] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show processor bus topology information The hcall H_GET_PERF_COUNTER_INFO with counter request value as PROCESSOR_BUS_TOPOLOGY(0XD0), can be used to get the system topology information. To expose the system topology information, patch adds sysfs file called "processor_bus_topology" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add macro for PROCESSOR_BUS_TOPOLOGY counter request value in hv-gpci.c file. Also add a new function called "systeminfo_gpci_request", to make the H_GET_PERF_COUNTER_INFO hcall with added macro and populates the output buffer. The processor_bus_topology sysfs file is only available for power10 and above platforms. Add a new function called "add_sysinfo_interface_files", which will add processor_bus_topology attribute in the interface_attrs array, only for power10 and above platforms. Also add macro INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR in hv-gpci.c file, which points to the index of NULL placefolder, for processor_bus_topology attribute. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-2-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 181 +++++++++++++++++++++++++++++++++++- 1 file changed, 179 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7ff8ff3509f5f..7ac9cecf438d3 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -102,6 +102,141 @@ static ssize_t cpumask_show(struct device *dev, return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); } +/* Counter request value to retrieve system information */ +#define PROCESSOR_BUS_TOPOLOGY 0XD0 + +/* Interface attribute array index to store system information */ +#define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 + +static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); + +static unsigned long systeminfo_gpci_request(u32 req, u32 starting_index, + u16 secondary_index, char *buf, + size_t *n, struct hv_gpci_request_buffer *arg) +{ + unsigned long ret; + size_t i, j; + + arg->params.counter_request = cpu_to_be32(req); + arg->params.starting_index = cpu_to_be32(starting_index); + arg->params.secondary_index = cpu_to_be16(secondary_index); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', + * which means that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. + * + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + /* + * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE + * because of invalid buffer-length/address or due to some hardware + * error. + */ + if (ret && (ret != H_PARAMETER)) + return -EIO; + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' + * to show the total number of counter_value array elements + * returned via hcall. + * hcall also populates 'cv_element_size' corresponds to individual + * counter_value array element size. Below loop go through all + * counter_value array elements as per their size and add it to + * the output buffer. + */ + for (i = 0; i < be16_to_cpu(arg->params.returned_values); i++) { + j = i * be16_to_cpu(arg->params.cv_element_size); + + for (; j < (i + 1) * be16_to_cpu(arg->params.cv_element_size); j++) + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[j]); + *n += sprintf(buf + *n, "\n"); + } + + if (*n >= PAGE_SIZE) { + pr_info("System information exceeds PAGE_SIZE\n"); + return -EFBIG; + } + + return ret; +} + +static ssize_t processor_bus_topology_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request value 0xD0 corresponds to request + * type 'Processor_bus_topology', to retrieve + * the system topology information. + * starting_index value implies the starting hardware + * chip id. + */ + ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to make subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index value is part of counter_value + * buffer elements, use the starting index value in the last + * element and add 1 to make subsequent hcalls. + */ + u32 starting_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + + (arg->bytes[last_element + 1] << 16) + + (arg->bytes[last_element] << 24) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, starting_index, + 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + static DEVICE_ATTR_RO(kernel_version); static DEVICE_ATTR_RO(cpumask); @@ -118,6 +253,11 @@ static struct attribute *interface_attrs[] = { &hv_caps_attr_expanded.attr, &hv_caps_attr_lab.attr, &hv_caps_attr_collect_privileged.attr, + /* + * This NULL is a placeholder for the processor_bus_topology + * attribute, set in init function if applicable. + */ + NULL, NULL, }; @@ -143,8 +283,6 @@ static const struct attribute_group *attr_groups[] = { NULL, }; -static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); - static unsigned long single_gpci_request(u32 req, u32 starting_index, u16 secondary_index, u8 version_in, u32 offset, u8 length, u64 *value) @@ -325,6 +463,41 @@ static int hv_gpci_cpu_hotplug_init(void) ppc_hv_gpci_cpu_offline); } +static void add_sysinfo_interface_files(void) +{ + struct device_attribute *attr = NULL; + unsigned long ret; + struct hv_gpci_request_buffer *arg; + + /* Check for counter request type PROCESSOR_BUS_TOPOLOGY support */ + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + arg->params.counter_request = cpu_to_be32(PROCESSOR_BUS_TOPOLOGY); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + put_cpu_var(hv_gpci_reqb); + + /* + * Add processor_bus_topology attribute in the interface_attrs + * attribute array, only for valid return types. + */ + if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + if (!attr) + return; + + sysfs_attr_init(&attr->attr); + attr->attr.name = "processor_bus_topology"; + attr->attr.mode = 0444; + attr->show = processor_bus_topology_show; + interface_attrs[INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr->attr; + } else + pr_devel("hcall failed, with error: 0x%lx\n", ret); +} + static int hv_gpci_init(void) { int r; @@ -388,6 +561,10 @@ static int hv_gpci_init(void) if (r) return r; + /* sysinfo interface files are only available for power10 and above platforms */ + if (PVR_VER(mfspr(SPRN_PVR)) >= PVR_POWER10) + add_sysinfo_interface_files(); + return 0; } From 9caf9e2b8bae58e39501f6fb1fc9189009538ccc Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:47 +0530 Subject: [PATCH 052/135] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document processor_bus_topology sysfs interface file Add details of the new hv-gpci interface file called "processor_bus_topology" in the ABI documentation. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-3-kjain@linux.ibm.com --- .../sysfs-bus-event_source-devices-hv_gpci | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 12e2bf92783fb..ba3f9aa3d68eb 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -80,3 +80,35 @@ Contact: Linux on PowerPC Developer List Description: read only This sysfs file exposes the cpumask which is designated to make HCALLs to retrieve hv-gpci pmu event counter data. + +What: /sys/devices/hv_gpci/interface/processor_bus_topology +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + PROCESSOR_BUS_TOPOLOGY(0xD0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. From 1a160c2a13c66c9ad47436e73c821f3d26818733 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:48 +0530 Subject: [PATCH 053/135] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show processor config information The hcall H_GET_PERF_COUNTER_INFO with counter request value as PROCESSOR_CONFIG(0X90), can be used to get the system processor configuration information. To expose the system processor config information, patch adds sysfs file called "processor_config" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add enum and sysinfo_counter_request array to get required counter request value in hv-gpci.c file. Also add a new function called "sysinfo_device_attr_create", which will create and return required device attribute to the add_sysinfo_interface_files function. The processor_config sysfs file is only available for power10 and above platforms. Add a new macro called INTERFACE_PROCESSOR_CONFIG_ATTR, which points to the index of NULL placefolder, for processor_config attribute in the interface_attrs array. Also add macro INTERFACE_NULL_ATTR which points to index of NULL attribute in interface_attrs array. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-4-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 166 +++++++++++++++++++++++++++++++++--- 1 file changed, 152 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 7ac9cecf438d3..73f2ed42705a5 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -102,11 +102,21 @@ static ssize_t cpumask_show(struct device *dev, return cpumap_print_to_pagebuf(true, buf, &hv_gpci_cpumask); } -/* Counter request value to retrieve system information */ -#define PROCESSOR_BUS_TOPOLOGY 0XD0 - /* Interface attribute array index to store system information */ #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 +#define INTERFACE_PROCESSOR_CONFIG_ATTR 7 +#define INTERFACE_NULL_ATTR 8 + +/* Counter request value to retrieve system information */ +enum { + PROCESSOR_BUS_TOPOLOGY, + PROCESSOR_CONFIG +}; + +static int sysinfo_counter_request[] = { + [PROCESSOR_BUS_TOPOLOGY] = 0xD0, + [PROCESSOR_CONFIG] = 0x90, +}; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -187,7 +197,8 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att * starting_index value implies the starting hardware * chip id. */ - ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, 0, 0, buf, &n, arg); + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], + 0, 0, buf, &n, arg); if (!ret) return n; @@ -220,8 +231,76 @@ static ssize_t processor_bus_topology_show(struct device *dev, struct device_att memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); - ret = systeminfo_gpci_request(PROCESSOR_BUS_TOPOLOGY, starting_index, - 0, buf, &n, arg); + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_BUS_TOPOLOGY], + starting_index, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + +static ssize_t processor_config_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request value 0x90 corresponds to request + * type 'Processor_config', to retrieve + * the system processor information. + * starting_index value implies the starting hardware + * processor index. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to take subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index is part of counter_value + * buffer elements, use the starting index value in the last + * element and add 1 to subsequent hcalls. + */ + u32 starting_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + + (arg->bytes[last_element + 1] << 16) + + (arg->bytes[last_element] << 24) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[PROCESSOR_CONFIG], + starting_index, 0, buf, &n, arg); if (!ret) return n; @@ -258,6 +337,11 @@ static struct attribute *interface_attrs[] = { * attribute, set in init function if applicable. */ NULL, + /* + * This NULL is a placeholder for the processor_config + * attribute, set in init function if applicable. + */ + NULL, NULL, }; @@ -463,17 +547,24 @@ static int hv_gpci_cpu_hotplug_init(void) ppc_hv_gpci_cpu_offline); } -static void add_sysinfo_interface_files(void) +static struct device_attribute *sysinfo_device_attr_create(int + sysinfo_interface_group_index, u32 req) { struct device_attribute *attr = NULL; unsigned long ret; struct hv_gpci_request_buffer *arg; - /* Check for counter request type PROCESSOR_BUS_TOPOLOGY support */ + if (sysinfo_interface_group_index < INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR || + sysinfo_interface_group_index >= INTERFACE_NULL_ATTR) { + pr_info("Wrong interface group index for system information\n"); + return NULL; + } + + /* Check for given counter request value support */ arg = (void *)get_cpu_var(hv_gpci_reqb); memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); - arg->params.counter_request = cpu_to_be32(PROCESSOR_BUS_TOPOLOGY); + arg->params.counter_request = cpu_to_be32(req); ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); @@ -481,21 +572,68 @@ static void add_sysinfo_interface_files(void) put_cpu_var(hv_gpci_reqb); /* - * Add processor_bus_topology attribute in the interface_attrs + * Add given counter request value attribute in the interface_attrs * attribute array, only for valid return types. */ if (!ret || ret == H_AUTHORITY || ret == H_PARAMETER) { attr = kzalloc(sizeof(*attr), GFP_KERNEL); if (!attr) - return; + return NULL; sysfs_attr_init(&attr->attr); - attr->attr.name = "processor_bus_topology"; attr->attr.mode = 0444; - attr->show = processor_bus_topology_show; - interface_attrs[INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr->attr; + + switch (sysinfo_interface_group_index) { + case INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR: + attr->attr.name = "processor_bus_topology"; + attr->show = processor_bus_topology_show; + break; + case INTERFACE_PROCESSOR_CONFIG_ATTR: + attr->attr.name = "processor_config"; + attr->show = processor_config_show; + break; + } } else pr_devel("hcall failed, with error: 0x%lx\n", ret); + + return attr; +} + +static void add_sysinfo_interface_files(void) +{ + int sysfs_count; + struct device_attribute *attr[INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR]; + int i; + + sysfs_count = INTERFACE_NULL_ATTR - INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR; + + /* Get device attribute for a given counter request value */ + for (i = 0; i < sysfs_count; i++) { + attr[i] = sysinfo_device_attr_create(i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR, + sysinfo_counter_request[i]); + + if (!attr[i]) + goto out; + } + + /* Add sysinfo interface attributes in the interface_attrs attribute array */ + for (i = 0; i < sysfs_count; i++) + interface_attrs[i + INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR] = &attr[i]->attr; + + return; + +out: + /* + * The sysinfo interface attributes will be added, only if hcall passed for + * all the counter request values. Free the device attribute array incase + * of any hcall failure. + */ + if (i > 0) { + while (i >= 0) { + kfree(attr[i]); + i--; + } + } } static int hv_gpci_init(void) From 3255171d3e861b14e4248a5c676099819cadfb6d Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:49 +0530 Subject: [PATCH 054/135] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document processor_config sysfs interface file Add details of the new hv-gpci interface file called "processor_config" in the ABI documentation. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-5-kjain@linux.ibm.com --- .../sysfs-bus-event_source-devices-hv_gpci | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index ba3f9aa3d68eb..9e81de18142ff 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -112,3 +112,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/processor_config +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + PROCESSOR_CONFIG(0x90). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. From 71a7ccb478fcfe1495bcabf4972d859b24264df7 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:50 +0530 Subject: [PATCH 055/135] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via virtual processor information The hcall H_GET_PERF_COUNTER_INFO with counter request value as AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0XA0), can be used to get the system affinity domain via virtual processor information. To expose the system affinity domain via virtual processor information, patch adds sysfs file called "affinity_domain_via_virtual_processor" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. The affinity_domain_via_virtual_processor sysfs file is only available for power10 and above platforms. Add a macro called INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR, which points to the index of NULL placeholder, for affinity_domain_via_virtual_processor attribute in interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c file. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-6-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 86 ++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 73f2ed42705a5..de2e1aee9e2cd 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -105,17 +105,20 @@ static ssize_t cpumask_show(struct device *dev, /* Interface attribute array index to store system information */ #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 -#define INTERFACE_NULL_ATTR 8 +#define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 +#define INTERFACE_NULL_ATTR 9 /* Counter request value to retrieve system information */ enum { PROCESSOR_BUS_TOPOLOGY, - PROCESSOR_CONFIG + PROCESSOR_CONFIG, + AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ }; static int sysinfo_counter_request[] = { [PROCESSOR_BUS_TOPOLOGY] = 0xD0, [PROCESSOR_CONFIG] = 0x90, + [AFFINITY_DOMAIN_VIA_VP] = 0xA0, }; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -316,6 +319,76 @@ static ssize_t processor_config_show(struct device *dev, struct device_attribute return ret; } +static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request 0xA0 corresponds to request + * type 'Affinity_domain_information_by_virutal_processor', + * to retrieve the system affinity domain information. + * starting_index value refers to the starting hardware + * processor index. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to take subsequent requests + * with next secondary index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next secondary index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index and secondary index type is part of the + * counter_value buffer elements, use the starting index value in the + * last array element as subsequent starting index, and use secondary index + * value in the last array element plus 1 as subsequent secondary index. + * For counter request '0xA0', starting index points to partition id + * and secondary index points to corresponding virtual processor index. + */ + u32 starting_index = arg->bytes[last_element + 1] + (arg->bytes[last_element] << 8); + u16 secondary_index = arg->bytes[last_element + 3] + + (arg->bytes[last_element + 2] << 8) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_VP], + starting_index, secondary_index, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + static DEVICE_ATTR_RO(kernel_version); static DEVICE_ATTR_RO(cpumask); @@ -342,6 +415,11 @@ static struct attribute *interface_attrs[] = { * attribute, set in init function if applicable. */ NULL, + /* + * This NULL is a placeholder for the affinity_domain_via_virtual_processor + * attribute, set in init function if applicable. + */ + NULL, NULL, }; @@ -592,6 +670,10 @@ static struct device_attribute *sysinfo_device_attr_create(int attr->attr.name = "processor_config"; attr->show = processor_config_show; break; + case INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR: + attr->attr.name = "affinity_domain_via_virtual_processor"; + attr->show = affinity_domain_via_virtual_processor_show; + break; } } else pr_devel("hcall failed, with error: 0x%lx\n", ret); From cc89ff3491b61cebc90b3394eb6b36635173d0dd Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:51 +0530 Subject: [PATCH 056/135] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_virtual_processor sysfs interface file Add details of the new hv-gpci interface file called "affinity_domain_via_virtual_processor" in the ABI documentation. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-7-kjain@linux.ibm.com --- .../sysfs-bus-event_source-devices-hv_gpci | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 9e81de18142ff..5ee33218be833 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -144,3 +144,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_virtual_processor +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_VIRTUAL_PROCESSOR(0xA0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. From a69a57cac1ec8995bb0b571dfccc3fe2f046719a Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:52 +0530 Subject: [PATCH 057/135] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via domain information The hcall H_GET_PERF_COUNTER_INFO with counter request value as AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0XB0), can be used to get the system affinity domain via domain information. To expose the system affinity domain via domain information, patch adds sysfs file called "affinity_domain_via_domain" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add new entry for AFFINITY_DOMAIN_VIA_DOM in sysinfo_counter_request array, which points to the counter request value "affinity_domain_via_domain" in hv-gpci.c file. The affinity_domain_via_domain sysfs file is only available for power10 and above platforms. Add a macro called INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR, which points to the index of NULL placeholder, for affinity_domain_via_domain attribute in interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c file. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-8-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 80 ++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index de2e1aee9e2cd..3f765cdf9298e 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -106,19 +106,22 @@ static ssize_t cpumask_show(struct device *dev, #define INTERFACE_PROCESSOR_BUS_TOPOLOGY_ATTR 6 #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 -#define INTERFACE_NULL_ATTR 9 +#define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 +#define INTERFACE_NULL_ATTR 10 /* Counter request value to retrieve system information */ enum { PROCESSOR_BUS_TOPOLOGY, PROCESSOR_CONFIG, AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ + AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ }; static int sysinfo_counter_request[] = { [PROCESSOR_BUS_TOPOLOGY] = 0xD0, [PROCESSOR_CONFIG] = 0x90, [AFFINITY_DOMAIN_VIA_VP] = 0xA0, + [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, }; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -389,6 +392,72 @@ static ssize_t affinity_domain_via_virtual_processor_show(struct device *dev, return ret; } +static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request 0xB0 corresponds to request + * type 'Affinity_domain_information_by_domain', + * to retrieve the system affinity domain information. + * starting_index value refers to the starting hardware + * processor index. + */ + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], + 0, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', which + * implies that buffer can't accommodate all information, and a partial buffer + * returned. To handle that, we need to take subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + int returned_values = be16_to_cpu(arg->params.returned_values); + int elementsize = be16_to_cpu(arg->params.cv_element_size); + int last_element = (returned_values - 1) * elementsize; + + /* + * Since the starting index value is part of counter_value + * buffer elements, use the starting index value in the last + * element and add 1 to make subsequent hcalls. + */ + u32 starting_index = arg->bytes[last_element + 1] + + (arg->bytes[last_element] << 8) + 1; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + ret = systeminfo_gpci_request(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_DOM], + starting_index, 0, buf, &n, arg); + + if (!ret) + return n; + + if (ret != H_PARAMETER) + goto out; + } + + return n; + +out: + put_cpu_var(hv_gpci_reqb); + return ret; +} + static DEVICE_ATTR_RO(kernel_version); static DEVICE_ATTR_RO(cpumask); @@ -420,6 +489,11 @@ static struct attribute *interface_attrs[] = { * attribute, set in init function if applicable. */ NULL, + /* + * This NULL is a placeholder for the affinity_domain_via_domain + * attribute, set in init function if applicable. + */ + NULL, NULL, }; @@ -674,6 +748,10 @@ static struct device_attribute *sysinfo_device_attr_create(int attr->attr.name = "affinity_domain_via_virtual_processor"; attr->show = affinity_domain_via_virtual_processor_show; break; + case INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR: + attr->attr.name = "affinity_domain_via_domain"; + attr->show = affinity_domain_via_domain_show; + break; } } else pr_devel("hcall failed, with error: 0x%lx\n", ret); From ab7e991286e729b0018722591c04eb2fd31771b0 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:53 +0530 Subject: [PATCH 058/135] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_domain sysfs interface file Add details of the new hv-gpci interface file called "affinity_domain_via_domain" in the ABI documentation. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-9-kjain@linux.ibm.com --- .../sysfs-bus-event_source-devices-hv_gpci | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 5ee33218be833..399f0a2bd546f 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -176,3 +176,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_domain +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_DOMAIN(0xB0). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. From a15e0d6a6929e737f71578ed4b05531fed5a96e8 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:54 +0530 Subject: [PATCH 059/135] powerpc/hv_gpci: Add sysfs file inside hv_gpci device to show affinity domain via partition information The hcall H_GET_PERF_COUNTER_INFO with counter request value as AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0XB1), can be used to get the system affinity domain via partition information. To expose the system affinity domain via partition information, patch adds sysfs file called "affinity_domain_via_partition" to the "/sys/devices/hv_gpci/interface/" of hv_gpci pmu driver. Add new entry for AFFINITY_DOMAIN_VIA_PAR in sysinfo_counter_request array, which points to the counter request value "affinity_domain_via_partition" in hv-gpci.c file. Also add a new function called "affinity_domain_via_partition_result_parse" to parse the hcall result and store it in output buffer. The affinity_domain_via_partition sysfs file is only available for power10 and above platforms. Add a macro called INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR, which points to the index of NULL placeholder, for affinity_domain_via_partition attribute in interface_attrs array. Also updated the value of INTERFACE_NULL_ATTR macro in hv-gpci.c file. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-10-kjain@linux.ibm.com --- arch/powerpc/perf/hv-gpci.c | 160 +++++++++++++++++++++++++++++++++++- 1 file changed, 159 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c index 3f765cdf9298e..39dbe6b348df2 100644 --- a/arch/powerpc/perf/hv-gpci.c +++ b/arch/powerpc/perf/hv-gpci.c @@ -107,7 +107,8 @@ static ssize_t cpumask_show(struct device *dev, #define INTERFACE_PROCESSOR_CONFIG_ATTR 7 #define INTERFACE_AFFINITY_DOMAIN_VIA_VP_ATTR 8 #define INTERFACE_AFFINITY_DOMAIN_VIA_DOM_ATTR 9 -#define INTERFACE_NULL_ATTR 10 +#define INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR 10 +#define INTERFACE_NULL_ATTR 11 /* Counter request value to retrieve system information */ enum { @@ -115,6 +116,7 @@ enum { PROCESSOR_CONFIG, AFFINITY_DOMAIN_VIA_VP, /* affinity domain via virtual processor */ AFFINITY_DOMAIN_VIA_DOM, /* affinity domain via domain */ + AFFINITY_DOMAIN_VIA_PAR, /* affinity domain via partition */ }; static int sysinfo_counter_request[] = { @@ -122,6 +124,7 @@ static int sysinfo_counter_request[] = { [PROCESSOR_CONFIG] = 0x90, [AFFINITY_DOMAIN_VIA_VP] = 0xA0, [AFFINITY_DOMAIN_VIA_DOM] = 0xB0, + [AFFINITY_DOMAIN_VIA_PAR] = 0xB1, }; static DEFINE_PER_CPU(char, hv_gpci_reqb[HGPCI_REQ_BUFFER_SIZE]) __aligned(sizeof(uint64_t)); @@ -458,6 +461,152 @@ static ssize_t affinity_domain_via_domain_show(struct device *dev, struct device return ret; } +static void affinity_domain_via_partition_result_parse(int returned_values, + int element_size, char *buf, size_t *last_element, + size_t *n, struct hv_gpci_request_buffer *arg) +{ + size_t i = 0, j = 0; + size_t k, l, m; + uint16_t total_affinity_domain_ele, size_of_each_affinity_domain_ele; + + /* + * hcall H_GET_PERF_COUNTER_INFO populates the 'returned_values' + * to show the total number of counter_value array elements + * returned via hcall. + * Unlike other request types, the data structure returned by this + * request is variable-size. For this counter request type, + * hcall populates 'cv_element_size' corresponds to minimum size of + * the structure returned i.e; the size of the structure with no domain + * information. Below loop go through all counter_value array + * to determine the number and size of each domain array element and + * add it to the output buffer. + */ + while (i < returned_values) { + k = j; + for (; k < j + element_size; k++) + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); + *n += sprintf(buf + *n, "\n"); + + total_affinity_domain_ele = (u8)arg->bytes[k - 2] << 8 | (u8)arg->bytes[k - 3]; + size_of_each_affinity_domain_ele = (u8)arg->bytes[k] << 8 | (u8)arg->bytes[k - 1]; + + for (l = 0; l < total_affinity_domain_ele; l++) { + for (m = 0; m < size_of_each_affinity_domain_ele; m++) { + *n += sprintf(buf + *n, "%02x", (u8)arg->bytes[k]); + k++; + } + *n += sprintf(buf + *n, "\n"); + } + + *n += sprintf(buf + *n, "\n"); + i++; + j = k; + } + + *last_element = k; +} + +static ssize_t affinity_domain_via_partition_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct hv_gpci_request_buffer *arg; + unsigned long ret; + size_t n = 0; + size_t last_element = 0; + u32 starting_index; + + arg = (void *)get_cpu_var(hv_gpci_reqb); + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + + /* + * Pass the counter request value 0xB1 corresponds to counter request + * type 'Affinity_domain_information_by_partition', + * to retrieve the system affinity domain by partition information. + * starting_index value refers to the starting hardware + * processor index. + */ + arg->params.counter_request = cpu_to_be32(sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); + arg->params.starting_index = cpu_to_be32(0); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + if (!ret) + goto parse_result; + + /* + * ret value as 'H_PARAMETER' implies that the current buffer size + * can't accommodate all the information, and a partial buffer + * returned. To handle that, we need to make subsequent requests + * with next starting index to retrieve additional (missing) data. + * Below loop do subsequent hcalls with next starting index and add it + * to buffer util we get all the information. + */ + while (ret == H_PARAMETER) { + affinity_domain_via_partition_result_parse( + be16_to_cpu(arg->params.returned_values) - 1, + be16_to_cpu(arg->params.cv_element_size), buf, + &last_element, &n, arg); + + if (n >= PAGE_SIZE) { + put_cpu_var(hv_gpci_reqb); + pr_debug("System information exceeds PAGE_SIZE\n"); + return -EFBIG; + } + + /* + * Since the starting index value is part of counter_value + * buffer elements, use the starting_index value in the last + * element and add 1 to make subsequent hcalls. + */ + starting_index = (u8)arg->bytes[last_element] << 8 | + (u8)arg->bytes[last_element + 1]; + + memset(arg, 0, HGPCI_REQ_BUFFER_SIZE); + arg->params.counter_request = cpu_to_be32( + sysinfo_counter_request[AFFINITY_DOMAIN_VIA_PAR]); + arg->params.starting_index = cpu_to_be32(starting_index); + + ret = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO, + virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE); + + if (ret && (ret != H_PARAMETER)) + goto out; + } + +parse_result: + affinity_domain_via_partition_result_parse( + be16_to_cpu(arg->params.returned_values), + be16_to_cpu(arg->params.cv_element_size), + buf, &last_element, &n, arg); + + put_cpu_var(hv_gpci_reqb); + return n; + +out: + put_cpu_var(hv_gpci_reqb); + + /* + * ret value as 'H_PARAMETER' corresponds to 'GEN_BUF_TOO_SMALL', + * which means that the current buffer size cannot accommodate + * all the information and a partial buffer returned. + * hcall fails incase of ret value other than H_SUCCESS or H_PARAMETER. + * + * ret value as H_AUTHORITY implies that partition is not permitted to retrieve + * performance information, and required to set + * "Enable Performance Information Collection" option. + */ + if (ret == H_AUTHORITY) + return -EPERM; + + /* + * hcall can fail with other possible ret value like H_PRIVILEGE/H_HARDWARE + * because of invalid buffer-length/address or due to some hardware + * error. + */ + return -EIO; +} + static DEVICE_ATTR_RO(kernel_version); static DEVICE_ATTR_RO(cpumask); @@ -494,6 +643,11 @@ static struct attribute *interface_attrs[] = { * attribute, set in init function if applicable. */ NULL, + /* + * This NULL is a placeholder for the affinity_domain_via_partition + * attribute, set in init function if applicable. + */ + NULL, NULL, }; @@ -752,6 +906,10 @@ static struct device_attribute *sysinfo_device_attr_create(int attr->attr.name = "affinity_domain_via_domain"; attr->show = affinity_domain_via_domain_show; break; + case INTERFACE_AFFINITY_DOMAIN_VIA_PAR_ATTR: + attr->attr.name = "affinity_domain_via_partition"; + attr->show = affinity_domain_via_partition_show; + break; } } else pr_devel("hcall failed, with error: 0x%lx\n", ret); From 8df99066940b6c82a0851f13adf653827dc524f7 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Sat, 29 Jul 2023 13:04:55 +0530 Subject: [PATCH 060/135] docs: ABI: sysfs-bus-event_source-devices-hv_gpci: Document affinity_domain_via_partition sysfs interface file Add details of the new hv-gpci interface file called "affinity_domain_via_partition" in the ABI documentation. Reviewed-by: Athira Rajeev Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230729073455.7918-11-kjain@linux.ibm.com --- .../sysfs-bus-event_source-devices-hv_gpci | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci index 399f0a2bd546f..40f7cd2405914 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_gpci @@ -208,3 +208,35 @@ Description: admin read only more information. * "-EFBIG" : System information exceeds PAGE_SIZE. + +What: /sys/devices/hv_gpci/interface/affinity_domain_via_partition +Date: July 2023 +Contact: Linux on PowerPC Developer List +Description: admin read only + This sysfs file exposes the system topology information by making HCALL + H_GET_PERF_COUNTER_INFO. The HCALL is made with counter request value + AFFINITY_DOMAIN_INFORMATION_BY_PARTITION(0xB1). + + * This sysfs file will be created only for power10 and above platforms. + + * User needs root privileges to read data from this sysfs file. + + * This sysfs file will be created, only when the HCALL returns "H_SUCCESS", + "H_AUTHORITY" or "H_PARAMETER" as the return type. + + HCALL with return error type "H_AUTHORITY" can be resolved during + runtime by setting "Enable Performance Information Collection" option. + + * The end user reading this sysfs file must decode the content as per + underlying platform/firmware. + + Possible error codes while reading this sysfs file: + + * "-EPERM" : Partition is not permitted to retrieve performance information, + required to set "Enable Performance Information Collection" option. + + * "-EIO" : Can't retrieve system information because of invalid buffer length/invalid address + or because of some hardware error. Refer to getPerfCountInfo documentation for + more information. + + * "-EFBIG" : System information exceeds PAGE_SIZE. From 8f8f1cd67aa026c9dab8eb4e087e4a2d8fa9d5bc Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:38 +1000 Subject: [PATCH 061/135] powerpc/watchpoints: Explain thread_change_pc() more The behaviour of the thread_change_pc() function is a bit cryptic without being more familiar with how the watchpoint logic handles perf's after-execute semantics. Expand the comment to explain why we can re-insert the breakpoint and unset the perf_single_step flag. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-2-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index e1b4e70c8fd0f..bad2991f906b5 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -499,6 +499,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, * Restores the breakpoint on the debug registers. * Invoke this function if it is known that the execution context is * about to change to cause loss of MSR_SE settings. + * + * The perf watchpoint will simply re-trigger once the thread is started again, + * and the watchpoint handler will set up MSR_SE and perf_single_step as + * needed. */ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) { From 668a6ec6ed57f0248070c490aba75a9572e4b0a4 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:39 +1000 Subject: [PATCH 062/135] powerpc/watchpoints: Don't track info persistently info is cheap to retrieve, and is likely optimised by the compiler anyway. On the other hand, propagating it across the functions makes it possible to be inconsistent and adds needless complexity. Remove it, and invoke counter_arch_bp() when we need to work with it. As we don't persist it, we just use the local bp array to track whether we are ignoring a breakpoint. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-3-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 60 +++++++++++++++-------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index bad2991f906b5..e6749642604cc 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -538,23 +538,22 @@ static bool is_octword_vsx_instr(int type, int size) * We've failed in reliably handling the hw-breakpoint. Unregister * it and throw a warning message to let the user know about it. */ -static void handler_error(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void handler_error(struct perf_event *bp) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at 0x%lx will be disabled.", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } -static void larx_stcx_err(struct perf_event *bp, struct arch_hw_breakpoint *info) +static void larx_stcx_err(struct perf_event *bp) { printk_ratelimited("Breakpoint hit on instruction that can't be emulated. Breakpoint at 0x%lx will be disabled.\n", - info->address); + counter_arch_bp(bp)->address); perf_event_disable_inatomic(bp); } static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, - struct arch_hw_breakpoint **info, int *hit, - ppc_inst_t instr) + int *hit, ppc_inst_t instr) { int i; int stepped; @@ -565,7 +564,7 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, if (!hit[i]) continue; current->thread.last_hit_ubp[i] = bp[i]; - info[i] = NULL; + bp[i] = NULL; } regs_set_return_msr(regs, regs->msr | MSR_SE); return false; @@ -576,15 +575,15 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; } return false; } return true; } -static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, +static void handle_p10dd1_spurious_exception(struct perf_event **bp, int *hit, unsigned long ea) { int i; @@ -596,10 +595,14 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, * spurious exception. */ for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + struct arch_hw_breakpoint *info; + + if (!bp[i]) continue; - hw_end_addr = ALIGN(info[i]->address + info[i]->len, HW_BREAKPOINT_SIZE); + info = counter_arch_bp(bp[i]); + + hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE); /* * Ending address of DAWR range is less than starting @@ -629,9 +632,9 @@ static void handle_p10dd1_spurious_exception(struct arch_hw_breakpoint **info, return; for (i = 0; i < nr_wp_slots(); i++) { - if (info[i]) { + if (bp[i]) { hit[i] = 1; - info[i]->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; + counter_arch_bp(bp[i])->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ; } } } @@ -642,7 +645,6 @@ int hw_breakpoint_handler(struct die_args *args) int rc = NOTIFY_STOP; struct perf_event *bp[HBP_NUM_MAX] = { NULL }; struct pt_regs *regs = args->regs; - struct arch_hw_breakpoint *info[HBP_NUM_MAX] = { NULL }; int i; int hit[HBP_NUM_MAX] = {0}; int nr_hit = 0; @@ -667,18 +669,20 @@ int hw_breakpoint_handler(struct die_args *args) wp_get_instr_detail(regs, &instr, &type, &size, &ea); for (i = 0; i < nr_wp_slots(); i++) { + struct arch_hw_breakpoint *info; + bp[i] = __this_cpu_read(bp_per_reg[i]); if (!bp[i]) continue; - info[i] = counter_arch_bp(bp[i]); - info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; + info = counter_arch_bp(bp[i]); + info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ; - if (wp_check_constraints(regs, instr, ea, type, size, info[i])) { + if (wp_check_constraints(regs, instr, ea, type, size, info)) { if (!IS_ENABLED(CONFIG_PPC_8xx) && ppc_inst_equal(instr, ppc_inst(0))) { - handler_error(bp[i], info[i]); - info[i] = NULL; + handler_error(bp[i]); + bp[i] = NULL; err = 1; continue; } @@ -697,7 +701,7 @@ int hw_breakpoint_handler(struct die_args *args) /* Workaround for Power10 DD1 */ if (!IS_ENABLED(CONFIG_PPC_8xx) && mfspr(SPRN_PVR) == 0x800100 && is_octword_vsx_instr(type, size)) { - handle_p10dd1_spurious_exception(info, hit, ea); + handle_p10dd1_spurious_exception(bp, hit, ea); } else { rc = NOTIFY_DONE; goto out; @@ -715,7 +719,7 @@ int hw_breakpoint_handler(struct die_args *args) if (!hit[i]) continue; perf_bp_event(bp[i], regs); - info[i] = NULL; + bp[i] = NULL; } rc = NOTIFY_DONE; goto reset; @@ -726,13 +730,13 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - larx_stcx_err(bp[i], info[i]); - info[i] = NULL; + larx_stcx_err(bp[i]); + bp[i] = NULL; } goto reset; } - if (!stepping_handler(regs, bp, info, hit, instr)) + if (!stepping_handler(regs, bp, hit, instr)) goto reset; } @@ -743,15 +747,15 @@ int hw_breakpoint_handler(struct die_args *args) for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - if (!(info[i]->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) + if (!(counter_arch_bp(bp[i])->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp[i], regs); } reset: for (i = 0; i < nr_wp_slots(); i++) { - if (!info[i]) + if (!bp[i]) continue; - __set_breakpoint(i, info[i]); + __set_breakpoint(i, counter_arch_bp(bp[i])); } out: From 1e60f3564bad09962646bf8c2af588ecf518d337 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:40 +1000 Subject: [PATCH 063/135] powerpc/watchpoints: Track perf single step directly on the breakpoint There is a bug in the current watchpoint tracking logic, where the teardown in arch_unregister_hw_breakpoint() uses bp->ctx->task, which it does not have a reference of and parallel threads may be in the process of destroying. This was partially addressed in commit fb822e6076d9 ("powerpc/hw_breakpoint: Fix oops when destroying hw_breakpoint event"), but the underlying issue of accessing a struct member in an unknown state still remained. Syzkaller managed to trigger a null pointer derefernce due to the race between the task destructor and checking the pointer and dereferencing it in the loop. While this null pointer dereference could be fixed by using READ_ONCE to access the task up front, that just changes the error to manipulating possbily freed memory. Instead, the breakpoint logic needs to be reworked to remove any dependency on a context or task struct during breakpoint removal. The reason we have this currently is to clear thread.last_hit_ubp. This member is used to differentiate the perf DAWR single-step sequence from other causes of single-step, such as userspace just calling ptrace(PTRACE_SINGLESTEP, ...). We need to differentiate them because, when the single step interrupt is received, we need to know whether to re-insert the DAWR breakpoint (perf) or not (ptrace / other). arch_unregister_hw_breakpoint() needs to clear this information to prevent dangling pointers to possibly freed memory. These pointers are dereferenced in single_step_dabr_instruction() without a way to check their validity. This patch moves the tracking of this information to the breakpoint itself. This means we no longer have to do anything special to clean up. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-4-bgray@linux.ibm.com --- arch/powerpc/include/asm/hw_breakpoint.h | 1 + arch/powerpc/include/asm/processor.h | 5 -- arch/powerpc/kernel/hw_breakpoint.c | 69 ++++++++---------------- 3 files changed, 23 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h index 84d39fd42f711..66db0147d5b42 100644 --- a/arch/powerpc/include/asm/hw_breakpoint.h +++ b/arch/powerpc/include/asm/hw_breakpoint.h @@ -18,6 +18,7 @@ struct arch_hw_breakpoint { u16 len; /* length of the target data symbol */ u16 hw_len; /* length programmed in hw */ u8 flags; + bool perf_single_step; /* temporarily uninstalled for a perf single step */ }; /* Note: Don't change the first 6 bits below as they are in the same order diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8a6754ffdc7ea..9e67cb1c72e93 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -172,11 +172,6 @@ struct thread_struct { unsigned int align_ctl; /* alignment handling control */ #ifdef CONFIG_HAVE_HW_BREAKPOINT struct perf_event *ptrace_bps[HBP_NUM_MAX]; - /* - * Helps identify source of single-step exception and subsequent - * hw-breakpoint enablement - */ - struct perf_event *last_hit_ubp[HBP_NUM_MAX]; #endif /* CONFIG_HAVE_HW_BREAKPOINT */ struct arch_hw_breakpoint hw_brk[HBP_NUM_MAX]; /* hardware breakpoint info */ unsigned long trap_nr; /* last trap # on this thread */ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index e6749642604cc..624375c188820 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -43,16 +43,6 @@ int hw_breakpoint_slots(int type) return 0; /* no instruction breakpoints available */ } -static bool single_step_pending(void) -{ - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (current->thread.last_hit_ubp[i]) - return true; - } - return false; -} /* * Install a perf counter breakpoint. @@ -84,7 +74,7 @@ int arch_install_hw_breakpoint(struct perf_event *bp) * Do not install DABR values if the instruction must be single-stepped. * If so, DABR will be populated in single_step_dabr_instruction(). */ - if (!single_step_pending()) + if (!info->perf_single_step) __set_breakpoint(i, info); return 0; @@ -371,28 +361,6 @@ void arch_release_bp_slot(struct perf_event *bp) } } -/* - * Perform cleanup of arch-specific counters during unregistration - * of the perf-event - */ -void arch_unregister_hw_breakpoint(struct perf_event *bp) -{ - /* - * If the breakpoint is unregistered between a hw_breakpoint_handler() - * and the single_step_dabr_instruction(), then cleanup the breakpoint - * restoration variables to prevent dangling pointers. - * FIXME, this should not be using bp->ctx at all! Sayeth peterz. - */ - if (bp->ctx && bp->ctx->task && bp->ctx->task != ((void *)-1L)) { - int i; - - for (i = 0; i < nr_wp_slots(); i++) { - if (bp->ctx->task->thread.last_hit_ubp[i] == bp) - bp->ctx->task->thread.last_hit_ubp[i] = NULL; - } - } -} - /* * Check for virtual address in kernel space. */ @@ -510,7 +478,9 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) int i; for (i = 0; i < nr_wp_slots(); i++) { - if (unlikely(tsk->thread.last_hit_ubp[i])) + struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); + + if (unlikely(bp && counter_arch_bp(bp)->perf_single_step)) goto reset; } return; @@ -520,7 +490,7 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs) for (i = 0; i < nr_wp_slots(); i++) { info = counter_arch_bp(__this_cpu_read(bp_per_reg[i])); __set_breakpoint(i, info); - tsk->thread.last_hit_ubp[i] = NULL; + info->perf_single_step = false; } } @@ -563,7 +533,8 @@ static bool stepping_handler(struct pt_regs *regs, struct perf_event **bp, for (i = 0; i < nr_wp_slots(); i++) { if (!hit[i]) continue; - current->thread.last_hit_ubp[i] = bp[i]; + + counter_arch_bp(bp[i])->perf_single_step = true; bp[i] = NULL; } regs_set_return_msr(regs, regs->msr | MSR_SE); @@ -770,24 +741,28 @@ NOKPROBE_SYMBOL(hw_breakpoint_handler); static int single_step_dabr_instruction(struct die_args *args) { struct pt_regs *regs = args->regs; - struct perf_event *bp = NULL; - struct arch_hw_breakpoint *info; - int i; bool found = false; /* * Check if we are single-stepping as a result of a * previous HW Breakpoint exception */ - for (i = 0; i < nr_wp_slots(); i++) { - bp = current->thread.last_hit_ubp[i]; + for (int i = 0; i < nr_wp_slots(); i++) { + struct perf_event *bp; + struct arch_hw_breakpoint *info; + + bp = __this_cpu_read(bp_per_reg[i]); if (!bp) continue; - found = true; info = counter_arch_bp(bp); + if (!info->perf_single_step) + continue; + + found = true; + /* * We shall invoke the user-defined callback function in the * single stepping handler to confirm to 'trigger-after-execute' @@ -795,19 +770,19 @@ static int single_step_dabr_instruction(struct die_args *args) */ if (!(info->type & HW_BRK_TYPE_EXTRANEOUS_IRQ)) perf_bp_event(bp, regs); - current->thread.last_hit_ubp[i] = NULL; + + info->perf_single_step = false; } if (!found) return NOTIFY_DONE; - for (i = 0; i < nr_wp_slots(); i++) { - bp = __this_cpu_read(bp_per_reg[i]); + for (int i = 0; i < nr_wp_slots(); i++) { + struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); if (!bp) continue; - info = counter_arch_bp(bp); - __set_breakpoint(i, info); + __set_breakpoint(i, counter_arch_bp(bp)); } /* From 5a2d8b9c06712b52b2f0f2fc9a144242277fda74 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:41 +1000 Subject: [PATCH 064/135] powerpc/watchpoints: Simplify watchpoint reinsertion We only remove watchpoints when they have the perf_single_step flag set, so we can reinsert them during the first iteration. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-5-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 624375c188820..bf8dda1a7e040 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -772,16 +772,6 @@ static int single_step_dabr_instruction(struct die_args *args) perf_bp_event(bp, regs); info->perf_single_step = false; - } - - if (!found) - return NOTIFY_DONE; - - for (int i = 0; i < nr_wp_slots(); i++) { - struct perf_event *bp = __this_cpu_read(bp_per_reg[i]); - if (!bp) - continue; - __set_breakpoint(i, counter_arch_bp(bp)); } @@ -789,7 +779,7 @@ static int single_step_dabr_instruction(struct die_args *args) * If the process was being single-stepped by ptrace, let the * other single-step actions occur (e.g. generate SIGTRAP). */ - if (test_thread_flag(TIF_SINGLESTEP)) + if (!found || test_thread_flag(TIF_SINGLESTEP)) return NOTIFY_DONE; return NOTIFY_STOP; From bd29813ae10698f7bdfb3c68eacbb6464ec701ff Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:42 +1000 Subject: [PATCH 065/135] powerpc/watchpoints: Remove ptrace/perf exclusion tracking ptrace and perf watchpoints were considered incompatible in commit 29da4f91c0c1 ("powerpc/watchpoint: Don't allow concurrent perf and ptrace events"), but the logic in that commit doesn't really apply. Ptrace doesn't automatically single step; the ptracer must request this explicitly. And the ptracer can do so regardless of whether a ptrace/perf watchpoint triggered or not: it could single step every instruction if it wanted to. Whatever stopped the ptracee before executing the instruction that would trigger the perf watchpoint is no longer relevant by this point. To get correct behaviour when perf and ptrace are watching the same data we must ignore the perf watchpoint. After all, ptrace has before-execute semantics, and perf is after-execute, so perf doesn't actually care about the watchpoint trigger at this point in time. Pausing before execution does not mean we will actually end up executing the instruction. Importantly though, we don't remove the perf watchpoint yet. This is key. The ptracer is free to do whatever it likes right now. E.g., it can continue the process, single step. or even set the child PC somewhere completely different. If it does try to execute the instruction though, without reinserting the watchpoint (in which case we go back to the start of this example), the perf watchpoint would immediately trigger. This time there is no ptrace watchpoint, so we can safely perform a single step and increment the perf counter. Upon receiving the single step exception, the existing code already handles propagating or consuming it based on whether another subsystem (e.g. ptrace) requested a single step. Again, this is needed with or without perf/ptrace exclusion, because ptrace could be single stepping this instruction regardless of if a watchpoint is involved. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-6-bgray@linux.ibm.com --- arch/powerpc/kernel/hw_breakpoint.c | 249 +--------------------------- 1 file changed, 1 insertion(+), 248 deletions(-) diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index bf8dda1a7e040..b8513dc3e53ac 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -114,253 +114,6 @@ static bool is_ptrace_bp(struct perf_event *bp) return bp->overflow_handler == ptrace_triggered; } -struct breakpoint { - struct list_head list; - struct perf_event *bp; - bool ptrace_bp; -}; - -/* - * While kernel/events/hw_breakpoint.c does its own synchronization, we cannot - * rely on it safely synchronizing internals here; however, we can rely on it - * not requesting more breakpoints than available. - */ -static DEFINE_SPINLOCK(cpu_bps_lock); -static DEFINE_PER_CPU(struct breakpoint *, cpu_bps[HBP_NUM_MAX]); -static DEFINE_SPINLOCK(task_bps_lock); -static LIST_HEAD(task_bps); - -static struct breakpoint *alloc_breakpoint(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = kzalloc(sizeof(*tmp), GFP_KERNEL); - if (!tmp) - return ERR_PTR(-ENOMEM); - tmp->bp = bp; - tmp->ptrace_bp = is_ptrace_bp(bp); - return tmp; -} - -static bool bp_addr_range_overlap(struct perf_event *bp1, struct perf_event *bp2) -{ - __u64 bp1_saddr, bp1_eaddr, bp2_saddr, bp2_eaddr; - - bp1_saddr = ALIGN_DOWN(bp1->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp1_eaddr = ALIGN(bp1->attr.bp_addr + bp1->attr.bp_len, HW_BREAKPOINT_SIZE); - bp2_saddr = ALIGN_DOWN(bp2->attr.bp_addr, HW_BREAKPOINT_SIZE); - bp2_eaddr = ALIGN(bp2->attr.bp_addr + bp2->attr.bp_len, HW_BREAKPOINT_SIZE); - - return (bp1_saddr < bp2_eaddr && bp1_eaddr > bp2_saddr); -} - -static bool alternate_infra_bp(struct breakpoint *b, struct perf_event *bp) -{ - return is_ptrace_bp(bp) ? !b->ptrace_bp : b->ptrace_bp; -} - -static bool can_co_exist(struct breakpoint *b, struct perf_event *bp) -{ - return !(alternate_infra_bp(b, bp) && bp_addr_range_overlap(b->bp, bp)); -} - -static int task_bps_add(struct perf_event *bp) -{ - struct breakpoint *tmp; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - spin_lock(&task_bps_lock); - list_add(&tmp->list, &task_bps); - spin_unlock(&task_bps_lock); - return 0; -} - -static void task_bps_remove(struct perf_event *bp) -{ - struct list_head *pos, *q; - - spin_lock(&task_bps_lock); - list_for_each_safe(pos, q, &task_bps) { - struct breakpoint *tmp = list_entry(pos, struct breakpoint, list); - - if (tmp->bp == bp) { - list_del(&tmp->list); - kfree(tmp); - break; - } - } - spin_unlock(&task_bps_lock); -} - -/* - * If any task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool all_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - bool ret = false; - - spin_lock(&task_bps_lock); - list_for_each_entry(tmp, &task_bps, list) { - if (!can_co_exist(tmp, bp)) { - ret = true; - break; - } - } - spin_unlock(&task_bps_lock); - return ret; -} - -/* - * If same task has breakpoint from alternate infrastructure, - * return true. Otherwise return false. - */ -static bool same_task_bps_check(struct perf_event *bp) -{ - struct breakpoint *tmp; - bool ret = false; - - spin_lock(&task_bps_lock); - list_for_each_entry(tmp, &task_bps, list) { - if (tmp->bp->hw.target == bp->hw.target && - !can_co_exist(tmp, bp)) { - ret = true; - break; - } - } - spin_unlock(&task_bps_lock); - return ret; -} - -static int cpu_bps_add(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - struct breakpoint *tmp; - int i = 0; - - tmp = alloc_breakpoint(bp); - if (IS_ERR(tmp)) - return PTR_ERR(tmp); - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) { - cpu_bp[i] = tmp; - break; - } - } - spin_unlock(&cpu_bps_lock); - return 0; -} - -static void cpu_bps_remove(struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - int i = 0; - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, bp->cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (!cpu_bp[i]) - continue; - - if (cpu_bp[i]->bp == bp) { - kfree(cpu_bp[i]); - cpu_bp[i] = NULL; - break; - } - } - spin_unlock(&cpu_bps_lock); -} - -static bool cpu_bps_check(int cpu, struct perf_event *bp) -{ - struct breakpoint **cpu_bp; - bool ret = false; - int i; - - spin_lock(&cpu_bps_lock); - cpu_bp = per_cpu_ptr(cpu_bps, cpu); - for (i = 0; i < nr_wp_slots(); i++) { - if (cpu_bp[i] && !can_co_exist(cpu_bp[i], bp)) { - ret = true; - break; - } - } - spin_unlock(&cpu_bps_lock); - return ret; -} - -static bool all_cpu_bps_check(struct perf_event *bp) -{ - int cpu; - - for_each_online_cpu(cpu) { - if (cpu_bps_check(cpu, bp)) - return true; - } - return false; -} - -int arch_reserve_bp_slot(struct perf_event *bp) -{ - int ret; - - /* ptrace breakpoint */ - if (is_ptrace_bp(bp)) { - if (all_cpu_bps_check(bp)) - return -ENOSPC; - - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } - - /* perf breakpoint */ - if (is_kernel_addr(bp->attr.bp_addr)) - return 0; - - if (bp->hw.target && bp->cpu == -1) { - if (same_task_bps_check(bp)) - return -ENOSPC; - - return task_bps_add(bp); - } else if (!bp->hw.target && bp->cpu != -1) { - if (all_task_bps_check(bp)) - return -ENOSPC; - - return cpu_bps_add(bp); - } - - if (same_task_bps_check(bp)) - return -ENOSPC; - - ret = cpu_bps_add(bp); - if (ret) - return ret; - ret = task_bps_add(bp); - if (ret) - cpu_bps_remove(bp); - - return ret; -} - -void arch_release_bp_slot(struct perf_event *bp) -{ - if (!is_kernel_addr(bp->attr.bp_addr)) { - if (bp->hw.target) - task_bps_remove(bp); - if (bp->cpu != -1) - cpu_bps_remove(bp); - } -} - /* * Check for virtual address in kernel space. */ @@ -687,7 +440,7 @@ int hw_breakpoint_handler(struct die_args *args) */ if (ptrace_bp) { for (i = 0; i < nr_wp_slots(); i++) { - if (!hit[i]) + if (!hit[i] || !is_ptrace_bp(bp[i])) continue; perf_bp_event(bp[i], regs); bp[i] = NULL; From 58709f6fc327a997daeeca77aa5e6bd4d4c238cf Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:43 +1000 Subject: [PATCH 066/135] selftests/powerpc/ptrace: Update ptrace-perf watchpoint selftest Now that ptrace and perf are no longer exclusive, update the test to exercise interesting interactions. An assembly file is used for the children to allow precise instruction choice and addresses, while avoiding any compiler quirks. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-7-bgray@linux.ibm.com --- .../testing/selftests/powerpc/ptrace/Makefile | 1 + .../powerpc/ptrace/ptrace-perf-asm.S | 33 + .../powerpc/ptrace/ptrace-perf-hwbreak.c | 882 +++++++----------- 3 files changed, 368 insertions(+), 548 deletions(-) create mode 100644 tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile index cbeeaeae8837a..1b39b86849da5 100644 --- a/tools/testing/selftests/powerpc/ptrace/Makefile +++ b/tools/testing/selftests/powerpc/ptrace/Makefile @@ -36,6 +36,7 @@ $(TM_TESTS): CFLAGS += -I../tm -mhtm CFLAGS += $(KHDR_INCLUDES) -fno-pie $(OUTPUT)/ptrace-gpr: ptrace-gpr.S +$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S $(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread $(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S new file mode 100644 index 0000000000000..9aa2e58f3189b --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ + +#include + +.global same_watch_addr_load +.global same_watch_addr_trap + +FUNC_START(same_watch_addr_child) + nop +same_watch_addr_load: + ld 0,0(3) + nop +same_watch_addr_trap: + trap + blr +FUNC_END(same_watch_addr_child) + + +.global perf_then_ptrace_load1 +.global perf_then_ptrace_load2 +.global perf_then_ptrace_trap + +FUNC_START(perf_then_ptrace_child) + nop +perf_then_ptrace_load1: + ld 0,0(3) +perf_then_ptrace_load2: + ld 0,0(4) + nop +perf_then_ptrace_trap: + trap + blr +FUNC_END(perf_then_ptrace_child) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c index d8a9e95fc03de..a0a0b9bb58543 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c @@ -1,659 +1,445 @@ // SPDX-License-Identifier: GPL-2.0+ -#include -#include -#include -#include -#include -#include -#include -#include + #include -#include +#include +#include +#include +#include #include -#include "ptrace.h" -char data[16]; +#include "utils.h" -/* Overlapping address range */ -volatile __u64 *ptrace_data1 = (__u64 *)&data[0]; -volatile __u64 *perf_data1 = (__u64 *)&data[4]; +/* + * Child subroutine that performs a load on the address, then traps + */ +void same_watch_addr_child(unsigned long *addr); -/* Non-overlapping address range */ -volatile __u64 *ptrace_data2 = (__u64 *)&data[0]; -volatile __u64 *perf_data2 = (__u64 *)&data[8]; +/* Address of the ld instruction in same_watch_addr_child() */ +extern char same_watch_addr_load[]; -static unsigned long pid_max_addr(void) -{ - FILE *fp; - char *line, *c; - char addr[100]; - size_t len = 0; - - fp = fopen("/proc/kallsyms", "r"); - if (!fp) { - printf("Failed to read /proc/kallsyms. Exiting..\n"); - exit(EXIT_FAILURE); - } +/* Address of the end trap instruction in same_watch_addr_child() */ +extern char same_watch_addr_trap[]; - while (getline(&line, &len, fp) != -1) { - if (!strstr(line, "pid_max") || strstr(line, "pid_max_max") || - strstr(line, "pid_max_min")) - continue; +/* + * Child subroutine that performs a load on the first address, then a load on + * the second address (with no instructions separating this from the first + * load), then traps. + */ +void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr); - strncpy(addr, line, len < 100 ? len : 100); - c = strchr(addr, ' '); - *c = '\0'; - return strtoul(addr, &c, 16); - } - fclose(fp); - printf("Could not find pid_max. Exiting..\n"); - exit(EXIT_FAILURE); - return -1; -} +/* Address of the first ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load1[]; -static void perf_user_event_attr_set(struct perf_event_attr *attr, __u64 addr, __u64 len) -{ - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = addr; - attr->bp_len = len; - attr->exclude_kernel = 1; - attr->exclude_hv = 1; -} +/* Address of the second ld instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_load2[]; -static void perf_kernel_event_attr_set(struct perf_event_attr *attr) +/* Address of the end trap instruction in perf_then_ptrace_child() */ +extern char perf_then_ptrace_trap[]; + +static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data) { - memset(attr, 0, sizeof(struct perf_event_attr)); - attr->type = PERF_TYPE_BREAKPOINT; - attr->size = sizeof(struct perf_event_attr); - attr->bp_type = HW_BREAKPOINT_R; - attr->bp_addr = pid_max_addr(); - attr->bp_len = sizeof(unsigned long); - attr->exclude_user = 1; - attr->exclude_hv = 1; + return syscall(__NR_ptrace, request, pid, addr, data); } -static int perf_cpu_event_open(int cpu, __u64 addr, __u64 len) +static long ptrace_traceme(void) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_TRACEME, 0, 0, 0); } -static int perf_thread_event_open(pid_t child_pid, __u64 addr, __u64 len) +static long ptrace_getregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_cpu_event_open(pid_t child_pid, int cpu, __u64 addr, __u64 len) +static long ptrace_setregs(pid_t pid, struct pt_regs *result) { - struct perf_event_attr attr; - - perf_user_event_attr_set(&attr, addr, len); - return syscall(__NR_perf_event_open, &attr, child_pid, cpu, -1, 0); + return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result); } -static int perf_thread_kernel_event_open(pid_t child_pid) +static long ptrace_cont(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, child_pid, -1, -1, 0); + return sys_ptrace(PTRACE_CONT, pid, 0, signal); } -static int perf_cpu_kernel_event_open(int cpu) +static long ptrace_singlestep(pid_t pid, long signal) { - struct perf_event_attr attr; - - perf_kernel_event_attr_set(&attr); - return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0); + return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal); } -static int child(void) +static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo) { - int ret; - - ret = ptrace(PTRACE_TRACEME, 0, NULL, 0); - if (ret) { - printf("Error: PTRACE_TRACEME failed\n"); - return 0; - } - kill(getpid(), SIGUSR1); /* --> parent (SIGUSR1) */ - - return 0; + return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo); } -static void ptrace_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, - __u64 addr, int len) +static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info) { - info->version = 1; - info->trigger_type = type; - info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; - info->addr = addr; - info->addr2 = addr + len; - info->condition_value = 0; - if (!len) - info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; - else - info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; + return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info); } -static int ptrace_open(pid_t child_pid, __u64 wp_addr, int len) +static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id) { - struct ppc_hw_breakpoint info; - - ptrace_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len); - return ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info); + return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id); } -static int test1(pid_t child_pid) +static long ptrace_getreg_pc(pid_t pid, void **pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range overlaps) - * fail; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; + err = ptrace_getregs(pid, ®s); + if (err) + return err; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; + *pc = (void *)regs.nip; - close(perf_fd); - return ret; + return 0; } -static int test2(pid_t child_pid) +static long ptrace_setreg_pc(pid_t pid, void *pc) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing cpu event by perf) - * if (addr range does not overlaps) - * allow; - */ + struct pt_regs regs; + long err; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; + err = ptrace_getregs(pid, ®s); + if (err) + return err; - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); + regs.nip = (unsigned long)pc; -perf_close: - close(perf_fd); - return ret; -} + err = ptrace_setregs(pid, ®s); + if (err) + return err; -static int test3(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd > 0 || errno != ENOSPC) - ret = -1; - - close(perf_fd); - return ret; + return 0; } -static int test4(pid_t child_pid) +static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu, + int group_fd, unsigned long flags) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the same thread) - * if (addr range does not overlaps) - * fail; - */ - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) - return -1; - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - -perf_close: - close(perf_fd); - return ret; + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); } -static int test5(pid_t child_pid) +static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by ptrace) - * if (existing thread event by perf on the different thread) - * allow; - */ - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) { - ret = -1; - goto perf_close; - } + memset(attr, 0, sizeof(struct perf_event_attr)); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); -perf_close: - close(perf_fd); -kill_child: - kill(cpid, SIGINT); - return ret; + attr->type = PERF_TYPE_BREAKPOINT; + attr->size = sizeof(struct perf_event_attr); + attr->bp_type = HW_BREAKPOINT_R; + attr->bp_addr = (u64)addr; + attr->bp_len = len; + attr->exclude_kernel = 1; + attr->exclude_hv = 1; } -static int test6(pid_t child_pid) +static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread kernel event by perf) - * if (existing thread event by ptrace on the same thread) - * allow; - * -- OR -- - * if (new per cpu kernel event by perf) - * if (existing thread event by ptrace) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_kernel_event_open(child_pid); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - - perf_fd = perf_cpu_kernel_event_open(0); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + struct perf_event_attr attr; -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + perf_user_event_attr_set(&attr, addr, len); + return perf_event_open(&attr, child_pid, -1, -1, 0); } -static int test7(pid_t child_pid) +static int perf_read_counter(int perf_fd, u64 *count) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; + /* + * A perf counter is retrieved by the read() syscall. It contains + * the current count as 8 bytes that are interpreted as a u64 */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + ssize_t len = read(perf_fd, count, sizeof(*count)); - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + if (len != sizeof(*count)) + return -1; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test8(pid_t child_pid) +static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info, + int type, void *addr, int len) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlaps) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; - - perf_fd = perf_thread_event_open(child_pid, (__u64)perf_data2, - sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); - -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + info->version = 1; + info->trigger_type = type; + info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE; + info->addr = (u64)addr; + info->addr2 = (u64)addr + len; + info->condition_value = 0; + if (!len) + info->addr_mode = PPC_BREAKPOINT_MODE_EXACT; + else + info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE; } -static int test9(pid_t child_pid) +/* + * Checks if we can place at least 2 watchpoints on the child process + */ +static int check_watchpoints(pid_t pid) { - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } + struct ppc_debug_info dbginfo; - perf_fd = perf_thread_event_open(cpid, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); + FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed"); + SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test10(pid_t child_pid) +/* + * Wrapper around a plain fork() call that sets up the child for + * ptrace-ing. Both the parent and child return from this, though + * the child is stopped until ptrace_cont(pid) is run by the parent. + */ +static int ptrace_fork_child(pid_t *pid) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + int status; - perf_fd = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; - - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + *pid = fork(); -static int test11(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + if (*pid < 0) + FAIL_IF_MSG(1, "Failed to fork child"); - perf_fd = perf_cpu_event_open(0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; + if (!*pid) { + FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed"); + FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP"); + } else { + /* Synchronise on child SIGSTOP */ + FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); } - close(perf_fd); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; + return 0; } -static int test12(pid_t child_pid) +/* + * Tests the interaction between ptrace and perf watching the same data. + * + * We expect ptrace to take 'priority', as it is has before-execute + * semantics. + * + * The perf counter should not be incremented yet because perf has after-execute + * semantics. E.g., if ptrace changes the child PC, we don't even execute the + * instruction at all. + * + * When the child is stopped for ptrace, we test both continue and single step. + * Both should increment the perf counter. We also test changing the PC somewhere + * different and stepping, which should not increment the perf counter. + */ +int same_watch_addr_test(void) { - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range overlaps) - * fail; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long value; /* Dummy value to be read/written to by child */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + if (!pid) { + same_watch_addr_child(&value); + exit(1); + } - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data1, sizeof(*perf_data1)); - if (perf_fd > 0 || errno != ENOSPC) - ret = -1; + err = check_watchpoints(pid); + if (err) + return err; - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + /* Place a perf watchpoint counter on value */ + perf_fd = perf_watchpoint_open(pid, &value, sizeof(value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); -static int test13(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the same thread) - * if (addr range does not overlap) - * allow; - */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data2, sizeof(*ptrace_data2)); - if (ptrace_fd < 0) - return -1; + /* Place a ptrace watchpoint on value */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - perf_fd = perf_thread_cpu_event_open(child_pid, 0, (__u64)perf_data2, sizeof(*perf_data2)); - if (perf_fd < 0) { - ret = -1; - goto ptrace_close; - } - close(perf_fd); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -ptrace_close: - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); -static int test14(pid_t child_pid) -{ - int perf_fd; - int ptrace_fd; - int cpid; - int ret = 0; - - /* Test: - * if (new per thread and per cpu event by perf) - * if (existing thread event by ptrace on the other thread) - * allow; + /* + * We stopped before executing the load, so perf should not have + * recorded any events yet */ - ptrace_fd = ptrace_open(child_pid, (__u64)ptrace_data1, sizeof(*ptrace_data1)); - if (ptrace_fd < 0) - return -1; - - cpid = fork(); - if (!cpid) { - /* Temporary Child */ - pause(); - exit(EXIT_SUCCESS); - } - - perf_fd = perf_thread_cpu_event_open(cpid, 0, (__u64)perf_data1, - sizeof(*perf_data1)); - if (perf_fd < 0) { - ret = -1; - goto kill_child; - } - close(perf_fd); - -kill_child: - kill(cpid, SIGINT); - ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, ptrace_fd); - return ret; -} + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event"); + + /* Single stepping over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); + + /* + * Set up a ptrace watchpoint on the value again and trigger it. + * The perf counter should not have incremented because we do not + * execute the load yet. + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed"); + + /* Continuing over the load should increment the perf counter */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 2, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, then continue, + * we should reach the end trap (because ptrace is one-shot) and have + * another perf event. + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter did not increment"); + + /* + * If we set the child PC back to the load instruction, set a ptrace + * watchpoint on the load, then continue, we should immediately get + * the ptrace trap without incrementing the perf counter + */ + FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint"); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); + + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); + + /* + * If we change the PC while stopped on the load instruction, we should + * not increment the perf counter (because ptrace is before-execute, + * perf is after-execute). + */ + FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC"); + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); -static int do_test(const char *msg, int (*fun)(pid_t arg), pid_t arg) -{ - int ret; + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap"); + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed"); - ret = fun(arg); - if (ret) - printf("%s: Error\n", msg); - else - printf("%s: Ok\n", msg); - return ret; -} + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); -char *desc[14] = { - "perf cpu event -> ptrace thread event (Overlapping)", - "perf cpu event -> ptrace thread event (Non-overlapping)", - "perf thread event -> ptrace same thread event (Overlapping)", - "perf thread event -> ptrace same thread event (Non-overlapping)", - "perf thread event -> ptrace other thread event", - "ptrace thread event -> perf kernel event", - "ptrace thread event -> perf same thread event (Overlapping)", - "ptrace thread event -> perf same thread event (Non-overlapping)", - "ptrace thread event -> perf other thread event", - "ptrace thread event -> perf cpu event (Overlapping)", - "ptrace thread event -> perf cpu event (Non-overlapping)", - "ptrace thread event -> perf same thread & cpu event (Overlapping)", - "ptrace thread event -> perf same thread & cpu event (Non-overlapping)", - "ptrace thread event -> perf other thread & cpu event", -}; - -static int test(pid_t child_pid) -{ - int ret = TEST_PASS; - - ret |= do_test(desc[0], test1, child_pid); - ret |= do_test(desc[1], test2, child_pid); - ret |= do_test(desc[2], test3, child_pid); - ret |= do_test(desc[3], test4, child_pid); - ret |= do_test(desc[4], test5, child_pid); - ret |= do_test(desc[5], test6, child_pid); - ret |= do_test(desc[6], test7, child_pid); - ret |= do_test(desc[7], test8, child_pid); - ret |= do_test(desc[8], test9, child_pid); - ret |= do_test(desc[9], test10, child_pid); - ret |= do_test(desc[10], test11, child_pid); - ret |= do_test(desc[11], test12, child_pid); - ret |= do_test(desc[12], test13, child_pid); - ret |= do_test(desc[13], test14, child_pid); - - return ret; + return 0; } -static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) +/* + * Tests the interaction between ptrace and perf when: + * 1. perf watches a value + * 2. ptrace watches a different value + * 3. The perf value is read, then the ptrace value is read immediately after + * + * A breakpoint implementation may accidentally misattribute/skip one of + * the ptrace or perf handlers, as interrupt based work is done after perf + * and before ptrace. + * + * We expect the perf counter to increment before the ptrace watchpoint + * triggers. + */ +int perf_then_ptrace_test(void) { - if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) { - perror("Can't get breakpoint info"); - exit(-1); + struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */ + int bp_id; /* Breakpoint handle of ptrace watchpoint */ + int perf_fd; /* File descriptor of perf performance counter */ + u64 perf_count; /* Most recently fetched perf performance counter value */ + pid_t pid; /* PID of child process */ + void *pc; /* Most recently fetched child PC value */ + int status; /* Stop status of child after waitpid */ + unsigned long perf_value; /* Dummy value to be watched by perf */ + unsigned long ptrace_value; /* Dummy value to be watched by ptrace */ + int err; + + err = ptrace_fork_child(&pid); + if (err) + return err; + + /* + * If we are the child, run a subroutine that reads the perf value, + * then reads the ptrace value with consecutive load instructions + */ + if (!pid) { + perf_then_ptrace_child(&perf_value, &ptrace_value); + exit(0); } -} -static int ptrace_perf_hwbreak(void) -{ - int ret; - pid_t child_pid; - struct ppc_debug_info dbginfo; + err = check_watchpoints(pid); + if (err) + return err; - child_pid = fork(); - if (!child_pid) - return child(); + /* Place a perf watchpoint counter */ + perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value)); + FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter"); - /* parent */ - wait(NULL); /* <-- child (SIGUSR1) */ + /* Place a ptrace watchpoint */ + ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, + &ptrace_value, sizeof(ptrace_value)); + bp_id = ppc_ptrace_sethwdbg(pid, &bp_info); + FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint"); - get_dbginfo(child_pid, &dbginfo); - SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)"); + /* Let the child run. It should stop on the ptrace watchpoint */ + FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child"); - ret = perf_cpu_event_open(0, (__u64)perf_data1, sizeof(*perf_data1)); - SKIP_IF_MSG(ret < 0, "perf_event_open syscall failed"); - close(ret); + FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child"); + FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped"); + FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC"); + FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load"); - ret = test(child_pid); + /* perf should have recorded the first load */ + FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter"); + FAIL_IF_MSG(perf_count != 1, "perf counter did not increment"); - ptrace(PTRACE_CONT, child_pid, NULL, 0); - return ret; + /* Clean up child */ + FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child"); + + return 0; } int main(int argc, char *argv[]) { - return test_harness(ptrace_perf_hwbreak, "ptrace-perf-hwbreak"); + int err = 0; + + err |= test_harness(same_watch_addr_test, "same_watch_addr"); + err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace"); + + return err; } From 53834a0c09252dea7918a9e1788bad880690900b Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 1 Aug 2023 11:17:44 +1000 Subject: [PATCH 067/135] perf/hw_breakpoint: Remove arch breakpoint hooks PowerPC was the only user of these hooks, and has been refactored to no longer require them. There is no need to keep them around, so remove them to reduce complexity. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801011744.153973-8-bgray@linux.ibm.com --- include/linux/hw_breakpoint.h | 3 --- kernel/events/hw_breakpoint.c | 28 ---------------------------- 2 files changed, 31 deletions(-) diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 7fbb459112734..db199d653dd1a 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -90,9 +90,6 @@ extern int dbg_reserve_bp_slot(struct perf_event *bp); extern int dbg_release_bp_slot(struct perf_event *bp); extern int reserve_bp_slot(struct perf_event *bp); extern void release_bp_slot(struct perf_event *bp); -int arch_reserve_bp_slot(struct perf_event *bp); -void arch_release_bp_slot(struct perf_event *bp); -void arch_unregister_hw_breakpoint(struct perf_event *bp); extern void flush_ptrace_hw_breakpoint(struct task_struct *tsk); diff --git a/kernel/events/hw_breakpoint.c b/kernel/events/hw_breakpoint.c index c3797701339cb..6c2cb4e4f48da 100644 --- a/kernel/events/hw_breakpoint.c +++ b/kernel/events/hw_breakpoint.c @@ -523,26 +523,6 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type, int we return 0; } -__weak int arch_reserve_bp_slot(struct perf_event *bp) -{ - return 0; -} - -__weak void arch_release_bp_slot(struct perf_event *bp) -{ -} - -/* - * Function to perform processor-specific cleanup during unregistration - */ -__weak void arch_unregister_hw_breakpoint(struct perf_event *bp) -{ - /* - * A weak stub function here for those archs that don't define - * it inside arch/.../kernel/hw_breakpoint.c - */ -} - /* * Constraints to check before allowing this new breakpoint counter. * @@ -594,7 +574,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) enum bp_type_idx type; int max_pinned_slots; int weight; - int ret; /* We couldn't initialize breakpoint constraints on boot */ if (!constraints_initialized) @@ -613,10 +592,6 @@ static int __reserve_bp_slot(struct perf_event *bp, u64 bp_type) if (max_pinned_slots > hw_breakpoint_slots_cached(type)) return -ENOSPC; - ret = arch_reserve_bp_slot(bp); - if (ret) - return ret; - return toggle_bp_slot(bp, true, type, weight); } @@ -634,8 +609,6 @@ static void __release_bp_slot(struct perf_event *bp, u64 bp_type) enum bp_type_idx type; int weight; - arch_release_bp_slot(bp); - type = find_slot_idx(bp_type); weight = hw_breakpoint_weight(bp); WARN_ON(toggle_bp_slot(bp, false, type, weight)); @@ -645,7 +618,6 @@ void release_bp_slot(struct perf_event *bp) { struct mutex *mtx = bp_constraints_lock(bp); - arch_unregister_hw_breakpoint(bp); __release_bp_slot(bp, bp->attr.bp_type); bp_constraints_unlock(mtx); } From 0e216fa576e1cdea3913fe82b7283fdfb58c5c07 Mon Sep 17 00:00:00 2001 From: Benjamin Gray Date: Tue, 25 Jul 2023 10:58:38 +1000 Subject: [PATCH 068/135] Documentation/powerpc: Fix ptrace request names The documented ptrace request names are currently wrong/incomplete. Fix this to improve correctness and searchability. Signed-off-by: Benjamin Gray Signed-off-by: Michael Ellerman Link: https://msgid.link/20230725005841.28854-2-bgray@linux.ibm.com --- Documentation/powerpc/ptrace.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Documentation/powerpc/ptrace.rst b/Documentation/powerpc/ptrace.rst index 77725d69eb4a4..5629edf4d56ec 100644 --- a/Documentation/powerpc/ptrace.rst +++ b/Documentation/powerpc/ptrace.rst @@ -15,7 +15,7 @@ that's extendable and that covers both BookE and server processors, so that GDB doesn't need to special-case each of them. We added the following 3 new ptrace requests. -1. PTRACE_PPC_GETHWDEBUGINFO +1. PPC_PTRACE_GETHWDBGINFO ============================ Query for GDB to discover the hardware debug features. The main info to @@ -48,7 +48,7 @@ features will have bits indicating whether there is support for:: #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10 #define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20 -2. PTRACE_SETHWDEBUG +2. PPC_PTRACE_SETHWDEBUG Sets a hardware breakpoint or watchpoint, according to the provided structure:: @@ -88,7 +88,7 @@ that the BookE supports. COMEFROM breakpoints available in server processors are not contemplated, but that is out of the scope of this work. ptrace will return an integer (handle) uniquely identifying the breakpoint or -watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG +watchpoint just created. This integer will be used in the PPC_PTRACE_DELHWDEBUG request to ask for its removal. Return -ENOSPC if the requested breakpoint can't be allocated on the registers. @@ -150,7 +150,7 @@ Some examples of using the structure to: p.addr2 = (uint64_t) end_range; p.condition_value = 0; -3. PTRACE_DELHWDEBUG +3. PPC_PTRACE_DELHWDEBUG Takes an integer which identifies an existing breakpoint or watchpoint (i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the From 9a32584bc108c8fe4d02fa33b16caf686e4a788a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 22 Jun 2023 12:01:23 +0200 Subject: [PATCH 069/135] powerpc/ptrace: Split gpr32_set_common objtool reports the following warning: arch/powerpc/kernel/ptrace/ptrace-view.o: warning: objtool: gpr32_set_common+0x23c (.text+0x860): redundant UACCESS disable gpr32_set_common() conditionally opens and closes UACCESS based on whether kbuf pointer is NULL or not. This is wackelig. Split gpr32_set_common() in two fonctions, one for user one for kernel. Signed-off-by: Christophe Leroy [mpe: Fix oops in gpr32_set_common_user() due to NULL kbuf] Signed-off-by: Michael Ellerman Link: https://msgid.link/b8d6ae4483fcfd17524e79d803c969694a85cc02.1687428075.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/ptrace/ptrace-view.c | 105 +++++++++++++++-------- 1 file changed, 67 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c index 3910cd7bb2d9b..584cf5c3df509 100644 --- a/arch/powerpc/kernel/ptrace/ptrace-view.c +++ b/arch/powerpc/kernel/ptrace/ptrace-view.c @@ -716,69 +716,86 @@ int gpr32_get_common(struct task_struct *target, return membuf_zero(&to, (ELF_NGREG - PT_REGS_COUNT) * sizeof(u32)); } -int gpr32_set_common(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf, - unsigned long *regs) +static int gpr32_set_common_kernel(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, unsigned long *regs) { const compat_ulong_t *k = kbuf; + + pos /= sizeof(compat_ulong_t); + count /= sizeof(compat_ulong_t); + + for (; count > 0 && pos < PT_MSR; --count) + regs[pos++] = *k++; + + if (count > 0 && pos == PT_MSR) { + set_user_msr(target, *k++); + ++pos; + --count; + } + + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) + regs[pos++] = *k++; + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + ++k; + + if (count > 0 && pos == PT_TRAP) { + set_user_trap(target, *k++); + ++pos; + --count; + } + + kbuf = k; + pos *= sizeof(compat_ulong_t); + count *= sizeof(compat_ulong_t); + user_regset_copyin_ignore(&pos, &count, &kbuf, NULL, + (PT_TRAP + 1) * sizeof(compat_ulong_t), -1); + return 0; +} + +static int gpr32_set_common_user(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void __user *ubuf, unsigned long *regs) +{ const compat_ulong_t __user *u = ubuf; + const void *kbuf = NULL; compat_ulong_t reg; - if (!kbuf && !user_read_access_begin(u, count)) + if (!user_read_access_begin(u, count)) return -EFAULT; pos /= sizeof(reg); count /= sizeof(reg); - if (kbuf) - for (; count > 0 && pos < PT_MSR; --count) - regs[pos++] = *k++; - else - for (; count > 0 && pos < PT_MSR; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - + for (; count > 0 && pos < PT_MSR; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; + } if (count > 0 && pos == PT_MSR) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_msr(target, reg); ++pos; --count; } - if (kbuf) { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) - regs[pos++] = *k++; - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - ++k; - } else { - for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { - unsafe_get_user(reg, u++, Efault); - regs[pos++] = reg; - } - for (; count > 0 && pos < PT_TRAP; --count, ++pos) - unsafe_get_user(reg, u++, Efault); + for (; count > 0 && pos <= PT_MAX_PUT_REG; --count) { + unsafe_get_user(reg, u++, Efault); + regs[pos++] = reg; } + for (; count > 0 && pos < PT_TRAP; --count, ++pos) + unsafe_get_user(reg, u++, Efault); if (count > 0 && pos == PT_TRAP) { - if (kbuf) - reg = *k++; - else - unsafe_get_user(reg, u++, Efault); + unsafe_get_user(reg, u++, Efault); set_user_trap(target, reg); ++pos; --count; } - if (!kbuf) - user_read_access_end(); + user_read_access_end(); - kbuf = k; ubuf = u; pos *= sizeof(reg); count *= sizeof(reg); @@ -791,6 +808,18 @@ int gpr32_set_common(struct task_struct *target, return -EFAULT; } +int gpr32_set_common(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf, + unsigned long *regs) +{ + if (kbuf) + return gpr32_set_common_kernel(target, regset, pos, count, kbuf, regs); + else + return gpr32_set_common_user(target, regset, pos, count, ubuf, regs); +} + static int gpr32_get(struct task_struct *target, const struct user_regset *regset, struct membuf to) From f94a84a0914841d79a38df7cc75512d88b31a0dc Mon Sep 17 00:00:00 2001 From: Justin Stitt Date: Wed, 16 Aug 2023 21:39:24 +0000 Subject: [PATCH 070/135] powerpc/ps3: refactor strncpy usage `strncpy` is deprecated for use on NUL-terminated destination strings [1]. `make_first_field()` should use similar implementation to `make_field()` due to memcpy having more obvious behavior here. The end result yields the same behavior as the previous `strncpy`-based implementation including the NUL-padding. Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strncpy-on-nul-terminated-strings [1] Link: https://github.com/KSPP/linux/issues/90 Signed-off-by: Justin Stitt Reviewed-by: Kees Cook Tested-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://msgid.link/20230816-strncpy-arch-powerpc-platforms-ps3-repository-v1-1-88283b02fb09@google.com --- arch/powerpc/platforms/ps3/repository.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 205763061a2d4..1abe33fbe5290 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -73,9 +73,9 @@ static void _dump_node(unsigned int lpar_id, u64 n1, u64 n2, u64 n3, u64 n4, static u64 make_first_field(const char *text, u64 index) { - u64 n; + u64 n = 0; - strncpy((char *)&n, text, 8); + memcpy((char *)&n, text, strnlen(text, sizeof(n))); return PS3_VENDOR_ID_NONE + (n >> 32) + index; } From be922070d0914c6642256ceec6b7be75c0a5ddf3 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Aug 2023 08:51:48 +0200 Subject: [PATCH 071/135] powerpc/512x: Make mpc512x_select_reset_compat() static mpc512x_select_reset_compat() is only used in the file it is defined. Make it static. Move mpc512x_restart_init() after mpc512x_select_reset_compat(). Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/36a19e13025dbf17e92e832dd24150642b0e9bad.1692341499.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/512x/mpc512x.h | 1 - arch/powerpc/platforms/512x/mpc512x_shared.c | 30 ++++++++++---------- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/platforms/512x/mpc512x.h b/arch/powerpc/platforms/512x/mpc512x.h index 2f3c60e373e1a..d2cb06e3a4360 100644 --- a/arch/powerpc/platforms/512x/mpc512x.h +++ b/arch/powerpc/platforms/512x/mpc512x.h @@ -13,7 +13,6 @@ extern void __init mpc512x_init(void); extern void __init mpc512x_setup_arch(void); extern int __init mpc5121_clk_init(void); const char *__init mpc512x_select_psc_compat(void); -const char *__init mpc512x_select_reset_compat(void); extern void __noreturn mpc512x_restart(char *cmd); #endif /* __MPC512X_H__ */ diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c index 5ac0ead2540fc..8f75e9574c27a 100644 --- a/arch/powerpc/platforms/512x/mpc512x_shared.c +++ b/arch/powerpc/platforms/512x/mpc512x_shared.c @@ -29,20 +29,6 @@ static struct mpc512x_reset_module __iomem *reset_module_base; -static void __init mpc512x_restart_init(void) -{ - struct device_node *np; - const char *reset_compat; - - reset_compat = mpc512x_select_reset_compat(); - np = of_find_compatible_node(NULL, NULL, reset_compat); - if (!np) - return; - - reset_module_base = of_iomap(np, 0); - of_node_put(np); -} - void __noreturn mpc512x_restart(char *cmd) { if (reset_module_base) { @@ -363,7 +349,7 @@ const char *__init mpc512x_select_psc_compat(void) return NULL; } -const char *__init mpc512x_select_reset_compat(void) +static const char *__init mpc512x_select_reset_compat(void) { if (of_machine_is_compatible("fsl,mpc5121")) return "fsl,mpc5121-reset"; @@ -455,6 +441,20 @@ static void __init mpc512x_psc_fifo_init(void) } } +static void __init mpc512x_restart_init(void) +{ + struct device_node *np; + const char *reset_compat; + + reset_compat = mpc512x_select_reset_compat(); + np = of_find_compatible_node(NULL, NULL, reset_compat); + if (!np) + return; + + reset_module_base = of_iomap(np, 0); + of_node_put(np); +} + void __init mpc512x_init_early(void) { mpc512x_restart_init(); From 6960c53c4c32865a7e79022e626aa82b64da4110 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 16 Aug 2023 17:19:54 +0200 Subject: [PATCH 072/135] powerpc/fsl_pci: Make fsl_add_bridge() static Since commit 905e75c46dba ("powerpc/fsl-pci: Unify pci/pcie initialization code") fsl_add_bridge() is not used anymore outside of fsl_pci.c Make it static. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2115e3597d81e72a865820af54f0e290d0fd2b3a.1692199186.git.christophe.leroy@csgroup.eu --- arch/powerpc/sysdev/fsl_pci.c | 2 +- arch/powerpc/sysdev/fsl_pci.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 6daf620b63a4d..5f7219df35ef0 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -519,7 +519,7 @@ void fsl_pcibios_fixup_bus(struct pci_bus *bus) } } -int fsl_add_bridge(struct platform_device *pdev, int is_primary) +static int fsl_add_bridge(struct platform_device *pdev, int is_primary) { int len; struct pci_controller *hose; diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h index 093a875d7d1ec..3bc4ab9d83414 100644 --- a/arch/powerpc/sysdev/fsl_pci.h +++ b/arch/powerpc/sysdev/fsl_pci.h @@ -112,7 +112,6 @@ struct ccsr_pci { }; -extern int fsl_add_bridge(struct platform_device *pdev, int is_primary); extern void fsl_pcibios_fixup_bus(struct pci_bus *bus); extern void fsl_pcibios_fixup_phb(struct pci_controller *phb); extern int mpc83xx_add_bridge(struct device_node *dev); From d25f01fba71dbaa117021aed3ea85cc5476c1a5a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 16 Aug 2023 17:22:16 +0200 Subject: [PATCH 073/135] powerpc/83xx: Fix style problems in usb.c and remove unneccessary includes from mpc83xx.h Replace printk(KERN_WARN with pr_warn( Remove a couple of blank lines Re-align multi-line code. Replace asm/io.h by linux/io.h mpc83xx.h doesn't need linux/device.h or asm/pci-bridge.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2cb498f637e082a4af8032311fad3cae84d6aa5d.1692199324.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/83xx/mpc83xx.h | 2 -- arch/powerpc/platforms/83xx/usb.c | 35 ++++++++++++--------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/platforms/83xx/mpc83xx.h b/arch/powerpc/platforms/83xx/mpc83xx.h index aea803ba3a152..0b8738a2b980e 100644 --- a/arch/powerpc/platforms/83xx/mpc83xx.h +++ b/arch/powerpc/platforms/83xx/mpc83xx.h @@ -3,8 +3,6 @@ #define __MPC83XX_H__ #include -#include -#include /* System Clock Control Register */ #define MPC83XX_SCCR_OFFS 0xA08 diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb.c index e2a13a052f961..d5ad6cff9bd87 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb.c @@ -6,19 +6,17 @@ * Author: Li Yang */ - #include #include #include #include #include +#include -#include #include #include "mpc83xx.h" - #ifdef CONFIG_PPC_MPC834x int __init mpc834x_usb_cfg(void) { @@ -44,8 +42,8 @@ int __init mpc834x_usb_cfg(void) prop = of_get_property(np, "phy_type", NULL); port1_is_dr = 1; - if (prop && (!strcmp(prop, "utmi") || - !strcmp(prop, "utmi_wide"))) { + if (prop && + (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) { sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; sicrh |= MPC834X_SICRH_USB_UTMI; port0_is_dr = 1; @@ -60,7 +58,7 @@ int __init mpc834x_usb_cfg(void) } else if (prop && !strcmp(prop, "ulpi")) { sicrl |= MPC834X_SICRL_USB1; } else { - printk(KERN_WARNING "834x USB PHY type not supported\n"); + pr_warn("834x USB PHY type not supported\n"); } of_node_put(np); } @@ -71,15 +69,13 @@ int __init mpc834x_usb_cfg(void) prop = of_get_property(np, "port0", NULL); if (prop) { if (port0_is_dr) - printk(KERN_WARNING - "834x USB port0 can't be used by both DR and MPH!\n"); + pr_warn("834x USB port0 can't be used by both DR and MPH!\n"); sicrl &= ~MPC834X_SICRL_USB0; } prop = of_get_property(np, "port1", NULL); if (prop) { if (port1_is_dr) - printk(KERN_WARNING - "834x USB port1 can't be used by both DR and MPH!\n"); + pr_warn("834x USB port1 can't be used by both DR and MPH!\n"); sicrl &= ~MPC834X_SICRL_USB1; } of_node_put(np); @@ -124,14 +120,14 @@ int __init mpc831x_usb_cfg(void) /* Configure clock */ immr_node = of_get_parent(np); if (immr_node && (of_device_is_compatible(immr_node, "fsl,mpc8315-immr") || - of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))) + of_device_is_compatible(immr_node, "fsl,mpc8308-immr"))) clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, - MPC8315_SCCR_USB_MASK, - MPC8315_SCCR_USB_DRCM_01); + MPC8315_SCCR_USB_MASK, + MPC8315_SCCR_USB_DRCM_01); else clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, - MPC83XX_SCCR_USB_MASK, - MPC83XX_SCCR_USB_DRCM_11); + MPC83XX_SCCR_USB_MASK, + MPC83XX_SCCR_USB_DRCM_11); /* Configure pin mux for ULPI. There is no pin mux for UTMI */ if (prop && !strcmp(prop, "ulpi")) { @@ -169,8 +165,7 @@ int __init mpc831x_usb_cfg(void) usb_regs = ioremap(res.start, resource_size(&res)); /* Using on-chip PHY */ - if (prop && (!strcmp(prop, "utmi_wide") || - !strcmp(prop, "utmi"))) { + if (prop && (!strcmp(prop, "utmi_wide") || !strcmp(prop, "utmi"))) { u32 refsel; if (of_device_is_compatible(immr_node, "fsl,mpc8308-immr")) @@ -182,7 +177,7 @@ int __init mpc831x_usb_cfg(void) refsel = CONTROL_REFSEL_48MHZ; /* Set UTMI_PHY_EN and REFSEL */ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, - CONTROL_UTMI_PHY_EN | refsel); + CONTROL_UTMI_PHY_EN | refsel); /* Using external UPLI PHY */ } else if (prop && !strcmp(prop, "ulpi")) { /* Set PHY_CLK_SEL to ULPI */ @@ -197,7 +192,7 @@ int __init mpc831x_usb_cfg(void) #endif /* CONFIG_USB_OTG */ out_be32(usb_regs + FSL_USB2_CONTROL_OFFS, temp); } else { - printk(KERN_WARNING "831x USB PHY type not supported\n"); + pr_warn("831x USB PHY type not supported\n"); ret = -EINVAL; } @@ -224,7 +219,7 @@ int __init mpc837x_usb_cfg(void) prop = of_get_property(np, "phy_type", NULL); if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { - printk(KERN_WARNING "837x USB PHY type not supported\n"); + pr_warn("837x USB PHY type not supported\n"); of_node_put(np); return -EINVAL; } From 5951b62ba4635881f1eb6c30e35d476871ff5d12 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 16 Aug 2023 17:22:17 +0200 Subject: [PATCH 074/135] powerpc/83xx: Split usb.c usb.c contains three independent parts with no common part. Split it. Signed-off-by: Christophe Leroy [mpe: Drop usb.o from Makefile to fix build] Signed-off-by: Michael Ellerman Link: https://msgid.link/75712b54bf9cb85ab10e47cd2772cd2a098ca895.1692199324.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/83xx/Makefile | 5 +- .../platforms/83xx/{usb.c => usb_831x.c} | 118 ------------------ arch/powerpc/platforms/83xx/usb_834x.c | 90 +++++++++++++ arch/powerpc/platforms/83xx/usb_837x.c | 58 +++++++++ 4 files changed, 152 insertions(+), 119 deletions(-) rename arch/powerpc/platforms/83xx/{usb.c => usb_831x.c} (51%) create mode 100644 arch/powerpc/platforms/83xx/usb_834x.c create mode 100644 arch/powerpc/platforms/83xx/usb_837x.c diff --git a/arch/powerpc/platforms/83xx/Makefile b/arch/powerpc/platforms/83xx/Makefile index 6b4013e01b3b6..6fc3dba943dad 100644 --- a/arch/powerpc/platforms/83xx/Makefile +++ b/arch/powerpc/platforms/83xx/Makefile @@ -2,7 +2,7 @@ # # Makefile for the PowerPC 83xx linux kernel. # -obj-y := misc.o usb.o +obj-y := misc.o obj-$(CONFIG_SUSPEND) += suspend.o suspend-asm.o obj-$(CONFIG_MCU_MPC8349EMITX) += mcu_mpc8349emitx.o obj-$(CONFIG_MPC830x_RDB) += mpc830x_rdb.o @@ -13,3 +13,6 @@ obj-$(CONFIG_MPC836x_RDK) += mpc836x_rdk.o obj-$(CONFIG_MPC837x_RDB) += mpc837x_rdb.o obj-$(CONFIG_ASP834x) += asp834x.o obj-$(CONFIG_KMETER1) += km83xx.o +obj-$(CONFIG_PPC_MPC831x) += usb_831x.o +obj-$(CONFIG_PPC_MPC834x) += usb_834x.o +obj-$(CONFIG_PPC_MPC837x) += usb_837x.o diff --git a/arch/powerpc/platforms/83xx/usb.c b/arch/powerpc/platforms/83xx/usb_831x.c similarity index 51% rename from arch/powerpc/platforms/83xx/usb.c rename to arch/powerpc/platforms/83xx/usb_831x.c index d5ad6cff9bd87..28c24e90f0223 100644 --- a/arch/powerpc/platforms/83xx/usb.c +++ b/arch/powerpc/platforms/83xx/usb_831x.c @@ -17,81 +17,6 @@ #include "mpc83xx.h" -#ifdef CONFIG_PPC_MPC834x -int __init mpc834x_usb_cfg(void) -{ - unsigned long sccr, sicrl, sicrh; - void __iomem *immap; - struct device_node *np = NULL; - int port0_is_dr = 0, port1_is_dr = 0; - const void *prop, *dr_mode; - - immap = ioremap(get_immrbase(), 0x1000); - if (!immap) - return -ENOMEM; - - /* Read registers */ - /* Note: DR and MPH must use the same clock setting in SCCR */ - sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK; - sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK; - sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI; - - np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); - if (np) { - sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */ - - prop = of_get_property(np, "phy_type", NULL); - port1_is_dr = 1; - if (prop && - (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) { - sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; - sicrh |= MPC834X_SICRH_USB_UTMI; - port0_is_dr = 1; - } else if (prop && !strcmp(prop, "serial")) { - dr_mode = of_get_property(np, "dr_mode", NULL); - if (dr_mode && !strcmp(dr_mode, "otg")) { - sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; - port0_is_dr = 1; - } else { - sicrl |= MPC834X_SICRL_USB1; - } - } else if (prop && !strcmp(prop, "ulpi")) { - sicrl |= MPC834X_SICRL_USB1; - } else { - pr_warn("834x USB PHY type not supported\n"); - } - of_node_put(np); - } - np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph"); - if (np) { - sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */ - - prop = of_get_property(np, "port0", NULL); - if (prop) { - if (port0_is_dr) - pr_warn("834x USB port0 can't be used by both DR and MPH!\n"); - sicrl &= ~MPC834X_SICRL_USB0; - } - prop = of_get_property(np, "port1", NULL); - if (prop) { - if (port1_is_dr) - pr_warn("834x USB port1 can't be used by both DR and MPH!\n"); - sicrl &= ~MPC834X_SICRL_USB1; - } - of_node_put(np); - } - - /* Write back */ - out_be32(immap + MPC83XX_SCCR_OFFS, sccr); - out_be32(immap + MPC83XX_SICRL_OFFS, sicrl); - out_be32(immap + MPC83XX_SICRH_OFFS, sicrh); - - iounmap(immap); - return 0; -} -#endif /* CONFIG_PPC_MPC834x */ - -#ifdef CONFIG_PPC_MPC831x int __init mpc831x_usb_cfg(void) { u32 temp; @@ -201,46 +126,3 @@ int __init mpc831x_usb_cfg(void) of_node_put(np); return ret; } -#endif /* CONFIG_PPC_MPC831x */ - -#ifdef CONFIG_PPC_MPC837x -int __init mpc837x_usb_cfg(void) -{ - void __iomem *immap; - struct device_node *np = NULL; - const void *prop; - int ret = 0; - - np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); - if (!np || !of_device_is_available(np)) { - of_node_put(np); - return -ENODEV; - } - prop = of_get_property(np, "phy_type", NULL); - - if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { - pr_warn("837x USB PHY type not supported\n"); - of_node_put(np); - return -EINVAL; - } - - /* Map IMMR space for pin and clock settings */ - immap = ioremap(get_immrbase(), 0x1000); - if (!immap) { - of_node_put(np); - return -ENOMEM; - } - - /* Configure clock */ - clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11, - MPC837X_SCCR_USB_DRCM_11); - - /* Configure pin mux for ULPI/serial */ - clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK, - MPC837X_SICRL_USB_ULPI); - - iounmap(immap); - of_node_put(np); - return ret; -} -#endif /* CONFIG_PPC_MPC837x */ diff --git a/arch/powerpc/platforms/83xx/usb_834x.c b/arch/powerpc/platforms/83xx/usb_834x.c new file mode 100644 index 0000000000000..3a8d6c662d06b --- /dev/null +++ b/arch/powerpc/platforms/83xx/usb_834x.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale 83xx USB SOC setup code + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. + * Author: Li Yang + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc83xx.h" + +int __init mpc834x_usb_cfg(void) +{ + unsigned long sccr, sicrl, sicrh; + void __iomem *immap; + struct device_node *np = NULL; + int port0_is_dr = 0, port1_is_dr = 0; + const void *prop, *dr_mode; + + immap = ioremap(get_immrbase(), 0x1000); + if (!immap) + return -ENOMEM; + + /* Read registers */ + /* Note: DR and MPH must use the same clock setting in SCCR */ + sccr = in_be32(immap + MPC83XX_SCCR_OFFS) & ~MPC83XX_SCCR_USB_MASK; + sicrl = in_be32(immap + MPC83XX_SICRL_OFFS) & ~MPC834X_SICRL_USB_MASK; + sicrh = in_be32(immap + MPC83XX_SICRH_OFFS) & ~MPC834X_SICRH_USB_UTMI; + + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); + if (np) { + sccr |= MPC83XX_SCCR_USB_DRCM_11; /* 1:3 */ + + prop = of_get_property(np, "phy_type", NULL); + port1_is_dr = 1; + if (prop && + (!strcmp(prop, "utmi") || !strcmp(prop, "utmi_wide"))) { + sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; + sicrh |= MPC834X_SICRH_USB_UTMI; + port0_is_dr = 1; + } else if (prop && !strcmp(prop, "serial")) { + dr_mode = of_get_property(np, "dr_mode", NULL); + if (dr_mode && !strcmp(dr_mode, "otg")) { + sicrl |= MPC834X_SICRL_USB0 | MPC834X_SICRL_USB1; + port0_is_dr = 1; + } else { + sicrl |= MPC834X_SICRL_USB1; + } + } else if (prop && !strcmp(prop, "ulpi")) { + sicrl |= MPC834X_SICRL_USB1; + } else { + pr_warn("834x USB PHY type not supported\n"); + } + of_node_put(np); + } + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-mph"); + if (np) { + sccr |= MPC83XX_SCCR_USB_MPHCM_11; /* 1:3 */ + + prop = of_get_property(np, "port0", NULL); + if (prop) { + if (port0_is_dr) + pr_warn("834x USB port0 can't be used by both DR and MPH!\n"); + sicrl &= ~MPC834X_SICRL_USB0; + } + prop = of_get_property(np, "port1", NULL); + if (prop) { + if (port1_is_dr) + pr_warn("834x USB port1 can't be used by both DR and MPH!\n"); + sicrl &= ~MPC834X_SICRL_USB1; + } + of_node_put(np); + } + + /* Write back */ + out_be32(immap + MPC83XX_SCCR_OFFS, sccr); + out_be32(immap + MPC83XX_SICRL_OFFS, sicrl); + out_be32(immap + MPC83XX_SICRH_OFFS, sicrh); + + iounmap(immap); + return 0; +} diff --git a/arch/powerpc/platforms/83xx/usb_837x.c b/arch/powerpc/platforms/83xx/usb_837x.c new file mode 100644 index 0000000000000..726935bb6e2d5 --- /dev/null +++ b/arch/powerpc/platforms/83xx/usb_837x.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Freescale 83xx USB SOC setup code + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. + * Author: Li Yang + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpc83xx.h" + +int __init mpc837x_usb_cfg(void) +{ + void __iomem *immap; + struct device_node *np = NULL; + const void *prop; + int ret = 0; + + np = of_find_compatible_node(NULL, NULL, "fsl-usb2-dr"); + if (!np || !of_device_is_available(np)) { + of_node_put(np); + return -ENODEV; + } + prop = of_get_property(np, "phy_type", NULL); + + if (!prop || (strcmp(prop, "ulpi") && strcmp(prop, "serial"))) { + pr_warn("837x USB PHY type not supported\n"); + of_node_put(np); + return -EINVAL; + } + + /* Map IMMR space for pin and clock settings */ + immap = ioremap(get_immrbase(), 0x1000); + if (!immap) { + of_node_put(np); + return -ENOMEM; + } + + /* Configure clock */ + clrsetbits_be32(immap + MPC83XX_SCCR_OFFS, MPC837X_SCCR_USB_DRCM_11, + MPC837X_SCCR_USB_DRCM_11); + + /* Configure pin mux for ULPI/serial */ + clrsetbits_be32(immap + MPC83XX_SICRL_OFFS, MPC837X_SICRL_USB_MASK, + MPC837X_SICRL_USB_ULPI); + + iounmap(immap); + of_node_put(np); + return ret; +} From 188da8af0a06b985c2e0f00ec04bd12dbbf870eb Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 10:05:08 +0200 Subject: [PATCH 075/135] powerpc/82xx: Remove pq2_init_pci Commit 859b21a008eb ("powerpc: drop PowerQUICC II Family ADS platform support") removed last user of pq2_init_pci. Remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/8b2db7c3c2c346aa8aa49507415c360d441e5bf5.1692259498.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/82xx/pq2.c | 46 ------------------------------- 1 file changed, 46 deletions(-) diff --git a/arch/powerpc/platforms/82xx/pq2.c b/arch/powerpc/platforms/82xx/pq2.c index 3b5cb39a564c8..391d72a2e09da 100644 --- a/arch/powerpc/platforms/82xx/pq2.c +++ b/arch/powerpc/platforms/82xx/pq2.c @@ -32,49 +32,3 @@ void __noreturn pq2_restart(char *cmd) panic("Restart failed\n"); } NOKPROBE_SYMBOL(pq2_restart) - -#ifdef CONFIG_PCI -static int pq2_pci_exclude_device(struct pci_controller *hose, - u_char bus, u8 devfn) -{ - if (bus == 0 && PCI_SLOT(devfn) == 0) - return PCIBIOS_DEVICE_NOT_FOUND; - else - return PCIBIOS_SUCCESSFUL; -} - -static void __init pq2_pci_add_bridge(struct device_node *np) -{ - struct pci_controller *hose; - struct resource r; - - if (of_address_to_resource(np, 0, &r) || r.end - r.start < 0x10b) - goto err; - - pci_add_flags(PCI_REASSIGN_ALL_BUS); - - hose = pcibios_alloc_controller(np); - if (!hose) - return; - - hose->dn = np; - - setup_indirect_pci(hose, r.start + 0x100, r.start + 0x104, 0); - pci_process_bridge_OF_ranges(hose, np, 1); - - return; - -err: - printk(KERN_ERR "No valid PCI reg property in device tree\n"); -} - -void __init pq2_init_pci(void) -{ - struct device_node *np; - - ppc_md.pci_exclude_device = pq2_pci_exclude_device; - - for_each_compatible_node(np, NULL, "fsl,pq2-pci") - pq2_pci_add_bridge(np); -} -#endif From eb5aa2137275da82052586f9bd405a1358b48139 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 10:05:49 +0200 Subject: [PATCH 076/135] powerpc/82xx: Remove CONFIG_8260 and CONFIG_8272 CONFIG_8272 is never used, remove it. CONFIG_8260 is redundant with CONFIG_PPC_82xx, remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/80930252a5167f3cdaa7eb694074d75521a0bdf9.1692259495.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/cputable.h | 2 +- arch/powerpc/platforms/82xx/Kconfig | 25 ++++--------------------- arch/powerpc/platforms/Kconfig | 2 +- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 443a9d482b152..8765d5158324e 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -252,7 +252,7 @@ static inline void cpu_feature_keys_init(void) { } * This is also required by 52xx family. */ #if defined(CONFIG_SMP) || defined(CONFIG_MPC10X_BRIDGE) \ - || defined(CONFIG_PPC_83xx) || defined(CONFIG_8260) \ + || defined(CONFIG_PPC_83xx) || defined(CONFIG_PPC_82xx) \ || defined(CONFIG_PPC_MPC52xx) #define CPU_FTR_COMMON CPU_FTR_NEED_COHERENT #else diff --git a/arch/powerpc/platforms/82xx/Kconfig b/arch/powerpc/platforms/82xx/Kconfig index 58e58b4f6a12e..d9f1a2a83158a 100644 --- a/arch/powerpc/platforms/82xx/Kconfig +++ b/arch/powerpc/platforms/82xx/Kconfig @@ -7,8 +7,8 @@ if PPC_82xx config EP8248E bool "Embedded Planet EP8248E (a.k.a. CWH-PPC-8248N-VE)" - select 8272 - select 8260 + select CPM2 + select PPC_INDIRECT_PCI if PCI select FSL_SOC select PHYLIB if NETDEVICES select MDIO_BITBANG if PHYLIB @@ -20,27 +20,10 @@ config EP8248E config MGCOGE bool "Keymile MGCOGE" - select 8272 - select 8260 + select CPM2 + select PPC_INDIRECT_PCI if PCI select FSL_SOC help This enables support for the Keymile MGCOGE board. endif - -config 8260 - bool - depends on PPC_BOOK3S_32 - select CPM2 - select PPC_INDIRECT_PCI if PCI - help - The MPC8260 is a typical embedded CPU made by Freescale. Selecting - this option means that you wish to build a kernel for a machine with - an 8260 class CPU. - -config 8272 - bool - select 8260 - help - The MPC8272 CPM has a different internal dpram setup than other CPM2 - devices diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 3e2e252016f7a..1fd253f92a774 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -251,7 +251,7 @@ config QE_GPIO config CPM2 bool "Enable support for the CPM2 (Communications Processor Module)" - depends on (FSL_SOC_BOOKE && PPC32) || 8260 + depends on (FSL_SOC_BOOKE && PPC32) || PPC_82xx select CPM select HAVE_PCI select GPIOLIB From 4531f128eac356144545b833908b6ffbc0292e16 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 14:26:45 +0200 Subject: [PATCH 077/135] powerpc/8xx: Remove init_internal_rtc() to fix no previous prototype error A W=1 build of mpc885_ads_defconfig throws the following error: CC arch/powerpc/platforms/8xx/m8xx_setup.o arch/powerpc/platforms/8xx/m8xx_setup.c:41:1: error: no previous prototype for 'init_internal_rtc' [-Werror=missing-prototypes] 41 | init_internal_rtc(void) | ^~~~~~~~~~~~~~~~~ init_internal_rtc() was introduced by commit df34403dcaac ("[POWERPC] 8xx: Add mpc885ads support and common mpc8xx files") as a weak function but has never been defined and/or used outside m8xx_setup.c As it is called only once there, just fold it into its caller and remove it. Signed-off-by: Christophe Leroy Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/0aa1141e18a84d926e199093204b37ec993f0c87.1692275185.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/8xx/m8xx_setup.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/platforms/8xx/m8xx_setup.c b/arch/powerpc/platforms/8xx/m8xx_setup.c index 3c5c4e08b6a99..2336b687bc96a 100644 --- a/arch/powerpc/platforms/8xx/m8xx_setup.c +++ b/arch/powerpc/platforms/8xx/m8xx_setup.c @@ -36,17 +36,6 @@ static irqreturn_t timebase_interrupt(int irq, void *dev) return IRQ_HANDLED; } -/* per-board overridable init_internal_rtc() function. */ -void __init __attribute__ ((weak)) -init_internal_rtc(void) -{ - /* Disable the RTC one second and alarm interrupts. */ - clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); - - /* Enable the RTC */ - setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); -} - static int __init get_freq(char *name, unsigned long *val) { struct device_node *cpu; @@ -117,7 +106,11 @@ void __init mpc8xx_calibrate_decr(void) out_be32(&mpc8xx_immr->im_sitk.sitk_rtcsck, KAPWR_KEY); out_be32(&mpc8xx_immr->im_sitk.sitk_tbk, KAPWR_KEY); - init_internal_rtc(); + /* Disable the RTC one second and alarm interrupts. */ + clrbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_SIE | RTCSC_ALE)); + + /* Enable the RTC */ + setbits16(&mpc8xx_immr->im_sit.sit_rtcsc, (RTCSC_RTF | RTCSC_RTE)); /* Enabling the decrementer also enables the timebase interrupts * (or from the other point of view, to get decrementer interrupts From 7cb0094be4a5dfb3c91d285977f489d334455e19 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 15 Aug 2023 19:42:40 +0200 Subject: [PATCH 078/135] powerpc/32s: Cleanup the mess in __set_pte_at() __set_pte_at() handles 3 main cases with #ifdefs plus the 'percpu' subcase which leads to code duplication. Rewrite the function using IS_ENABLED() to minimise the total number of cases and remove duplicated code. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2322dd08217bccab25456fe8b189edf0e6a8b6dd.1692121353.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/32/pgtable.h | 77 ++++++++------------ 1 file changed, 31 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 7bf1fe7297c63..d49c2a9d4ffe1 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -541,58 +541,43 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) /* This low level function performs the actual PTE insertion - * Setting the PTE depends on the MMU type and other factors. It's - * an horrible mess that I'm not going to try to clean up now but - * I'm keeping it in one place rather than spread around + * Setting the PTE depends on the MMU type and other factors. + * + * First case is 32-bit in UP mode with 32-bit PTEs, we need to preserve + * the _PAGE_HASHPTE bit since we may not have invalidated the previous + * translation in the hash yet (done in a subsequent flush_tlb_xxx()) + * and see we need to keep track that this PTE needs invalidating. + * + * Second case is 32-bit with 64-bit PTE. In this case, we + * can just store as long as we do the two halves in the right order + * with a barrier in between. This is possible because we take care, + * in the hash code, to pre-invalidate if the PTE was already hashed, + * which synchronizes us with any concurrent invalidation. + * In the percpu case, we fallback to the simple update preserving + * the hash bits (ie, same as the non-SMP case). + * + * Third case is 32-bit in SMP mode with 32-bit PTEs. We use the + * helper pte_update() which does an atomic update. We need to do that + * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a + * per-CPU PTE such as a kmap_atomic, we also do a simple update preserving + * the hash bits instead. */ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte, int percpu) { -#if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT) - /* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use the - * helper pte_update() which does an atomic update. We need to do that - * because a concurrent invalidation can clear _PAGE_HASHPTE. If it's a - * per-CPU PTE such as a kmap_atomic, we do a simple update preserving - * the hash bits instead (ie, same as the non-SMP case) - */ - if (percpu) - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) - | (pte_val(pte) & ~_PAGE_HASHPTE)); - else - pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0); + if ((!IS_ENABLED(CONFIG_SMP) && !IS_ENABLED(CONFIG_PTE_64BIT)) || percpu) { + *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) | + (pte_val(pte) & ~_PAGE_HASHPTE)); + } else if (IS_ENABLED(CONFIG_PTE_64BIT)) { + if (pte_val(*ptep) & _PAGE_HASHPTE) + flush_hash_entry(mm, ptep, addr); -#elif defined(CONFIG_PTE_64BIT) - /* Second case is 32-bit with 64-bit PTE. In this case, we - * can just store as long as we do the two halves in the right order - * with a barrier in between. This is possible because we take care, - * in the hash code, to pre-invalidate if the PTE was already hashed, - * which synchronizes us with any concurrent invalidation. - * In the percpu case, we also fallback to the simple update preserving - * the hash bits - */ - if (percpu) { - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) - | (pte_val(pte) & ~_PAGE_HASHPTE)); - return; + asm volatile("stw%X0 %2,%0; eieio; stw%X1 %L2,%1" : + "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) : + "r" (pte) : "memory"); + } else { + pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, pte_val(pte), 0); } - if (pte_val(*ptep) & _PAGE_HASHPTE) - flush_hash_entry(mm, ptep, addr); - __asm__ __volatile__("\ - stw%X0 %2,%0\n\ - eieio\n\ - stw%X1 %L2,%1" - : "=m" (*ptep), "=m" (*((unsigned char *)ptep+4)) - : "r" (pte) : "memory"); - -#else - /* Third case is 32-bit hash table in UP mode, we need to preserve - * the _PAGE_HASHPTE bit since we may not have invalidated the previous - * translation in the hash yet (done in a subsequent flush_tlb_xxx()) - * and see we need to keep track that this PTE needs invalidating - */ - *ptep = __pte((pte_val(*ptep) & _PAGE_HASHPTE) - | (pte_val(pte) & ~_PAGE_HASHPTE)); -#endif } /* From ca13c130a43fe3ab625d22ada0a61e5c0b612229 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 14:40:49 +0200 Subject: [PATCH 079/135] powerpc/4xx: Remove WatchdogHandler() to fix no previous prototype error Building ppc40x_defconfig throws the following error: CC arch/powerpc/kernel/traps.o arch/powerpc/kernel/traps.c:2232:29: warning: no previous prototype for 'WatchdogHandler' [-Wmissing-prototypes] 2232 | void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) | ^~~~~~~~~~~~~~~ This function was imported by commit 14cf11af6cf6 ("powerpc: Merge enough to start building in arch/powerpc.") as a weak function but never defined and/or called outside traps.c As it has only one caller fold it inside its caller and remove it. Signed-off-by: Christophe Leroy Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://msgid.link/38fe1078eb403eef74dc8f29387636fd7ecdf43c.1692276041.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/traps.c | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index f5ce282dc4b8b..eeff136b83d97 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -2225,21 +2225,10 @@ void __noreturn unrecoverable_exception(struct pt_regs *regs) } #if defined(CONFIG_BOOKE_WDT) || defined(CONFIG_40x) -/* - * Default handler for a Watchdog exception, - * spins until a reboot occurs - */ -void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) -{ - /* Generic WatchdogHandler, implement your own */ - mtspr(SPRN_TCR, mfspr(SPRN_TCR)&(~TCR_WIE)); - return; -} - DEFINE_INTERRUPT_HANDLER_NMI(WatchdogException) { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); - WatchdogHandler(regs); + mtspr(SPRN_TCR, mfspr(SPRN_TCR) & ~TCR_WIE); return 0; } #endif From 81554d10b22a211e4598a067a0f84b6e9e492669 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 15:44:26 +0200 Subject: [PATCH 080/135] powerpc/4xx: Remove pika_dtm_[un]register_shutdown() to fix no previous prototype ppc4xx_defconfig with W=1 results in: CC arch/powerpc/platforms/44x/warp.o arch/powerpc/platforms/44x/warp.c:369:5: error: no previous prototype for 'pika_dtm_register_shutdown' [-Werror=missing-prototypes] 369 | int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg) | ^~~~~~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/44x/warp.c:374:5: error: no previous prototype for 'pika_dtm_unregister_shutdown' [-Werror=missing-prototypes] 374 | int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~ The functions were added by commit 4ebef31fa6e0 ("[POWERPC] PIKA Warp: Update platform code to support Rev B boards") Those functions are not used localy and allthough their symbols are exported they are not declared in any header file so they can't be used. Remove them, then remove the associated list as it will now remain empty hence becomes useless. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/830923f0e0375a14609204246d302c7476a8f948.1692279855.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/44x/warp.c | 55 ------------------------------- 1 file changed, 55 deletions(-) diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c index bfeb9bdc3258e..bf0188dcb9184 100644 --- a/arch/powerpc/platforms/44x/warp.c +++ b/arch/powerpc/platforms/44x/warp.c @@ -83,45 +83,8 @@ static int __init warp_post_info(void) #ifdef CONFIG_SENSORS_AD7414 -static LIST_HEAD(dtm_shutdown_list); static void __iomem *dtm_fpga; -struct dtm_shutdown { - struct list_head list; - void (*func)(void *arg); - void *arg; -}; - -int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg) -{ - struct dtm_shutdown *shutdown; - - shutdown = kmalloc(sizeof(struct dtm_shutdown), GFP_KERNEL); - if (shutdown == NULL) - return -ENOMEM; - - shutdown->func = func; - shutdown->arg = arg; - - list_add(&shutdown->list, &dtm_shutdown_list); - - return 0; -} - -int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg) -{ - struct dtm_shutdown *shutdown; - - list_for_each_entry(shutdown, &dtm_shutdown_list, list) - if (shutdown->func == func && shutdown->arg == arg) { - list_del(&shutdown->list); - kfree(shutdown); - return 0; - } - - return -EINVAL; -} - #define WARP_GREEN_LED 0 #define WARP_RED_LED 1 @@ -153,17 +116,12 @@ static struct platform_device warp_gpio_leds = { static irqreturn_t temp_isr(int irq, void *context) { - struct dtm_shutdown *shutdown; int value = 1; local_irq_disable(); gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0); - /* Run through the shutdown list. */ - list_for_each_entry(shutdown, &dtm_shutdown_list, list) - shutdown->func(shutdown->arg); - printk(KERN_EMERG "\n\nCritical Temperature Shutdown\n\n"); while (1) { @@ -366,19 +324,6 @@ machine_late_initcall(warp, pika_dtm_start); #else /* !CONFIG_SENSORS_AD7414 */ -int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg) -{ - return 0; -} - -int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg) -{ - return 0; -} - machine_late_initcall(warp, warp_post_info); #endif - -EXPORT_SYMBOL(pika_dtm_register_shutdown); -EXPORT_SYMBOL(pika_dtm_unregister_shutdown); From ed05c71a574235aec3418d2b5c94f24ae84cc2db Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 16:26:44 +0200 Subject: [PATCH 081/135] powerpc/47x: Remove early_init_mmu_47x() to fix no previous prototype 4xx/iss476-smp_defconfig leads to: CC arch/powerpc/mm/nohash/tlb.o arch/powerpc/mm/nohash/tlb.c:322:13: error: no previous prototype for 'early_init_mmu_47x' [-Werror=missing-prototypes] 322 | void __init early_init_mmu_47x(void) | ^~~~~~~~~~~~~~~~~~ early_init_mmu_47x() is used only at one place and only locally. Fold it into its only caller and remove it. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/0a667b7c2e05d3cf41ecd38f33cc334083a61c8d.1692282396.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/nohash/tlb.c | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c index a903b308acc54..5ffa0af4328af 100644 --- a/arch/powerpc/mm/nohash/tlb.c +++ b/arch/powerpc/mm/nohash/tlb.c @@ -318,17 +318,6 @@ EXPORT_SYMBOL(flush_tlb_page); #endif /* CONFIG_SMP */ -#ifdef CONFIG_PPC_47x -void __init early_init_mmu_47x(void) -{ -#ifdef CONFIG_SMP - unsigned long root = of_get_flat_dt_root(); - if (of_get_flat_dt_prop(root, "cooperative-partition", NULL)) - mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST); -#endif /* CONFIG_SMP */ -} -#endif /* CONFIG_PPC_47x */ - /* * Flush kernel TLB entries in the given range */ @@ -746,8 +735,10 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base, #else /* ! CONFIG_PPC64 */ void __init early_init_mmu(void) { -#ifdef CONFIG_PPC_47x - early_init_mmu_47x(); -#endif + unsigned long root = of_get_flat_dt_root(); + + if (IS_ENABLED(CONFIG_PPC_47x) && IS_ENABLED(CONFIG_SMP) && + of_get_flat_dt_prop(root, "cooperative-partition", NULL)) + mmu_clear_feature(MMU_FTR_USE_TLBIVAX_BCAST); } #endif /* CONFIG_PPC64 */ From b27c1a0a4e62af1fd9d2688bf8156a5d546e4227 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 16:25:49 +0200 Subject: [PATCH 082/135] powerpc/47x: Add prototype for mmu_init_secondary() A W=1 build of 44x/iss476-smp_defconfig gives: arch/powerpc/mm/nohash/44x.c:220:13: error: no previous prototype for 'mmu_init_secondary' [-Werror=missing-prototypes] 220 | void __init mmu_init_secondary(int cpu) | ^~~~~~~~~~~~~~~~~~ That function is called from head_4xx.S Add a prototype in mmu_decl.h Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/e89d9927c926044e54fd056a849785f526c6414f.1692282340.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/mmu_decl.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index c6dccb4f06dca..7f9ff0640124a 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -110,6 +110,7 @@ extern void MMU_init_hw(void); void MMU_init_hw_patch(void); unsigned long mmu_mapin_ram(unsigned long base, unsigned long top); #endif +void mmu_init_secondary(int cpu); #ifdef CONFIG_PPC_E500 extern unsigned long map_mem_in_cams(unsigned long ram, int max_cam_idx, From 7dac7cf1b407605c15d85ae885377ba0560ca4cd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 17 Aug 2023 16:27:23 +0200 Subject: [PATCH 083/135] powerpc/4xx: Add missing includes to fix no previous prototype errors A W=1 build of ppc40x_defconfig throws the followings errors: CC arch/powerpc/platforms/4xx/uic.o arch/powerpc/platforms/4xx/uic.c:274:13: warning: no previous prototype for 'uic_init_tree' [-Wmissing-prototypes] 274 | void __init uic_init_tree(void) | ^~~~~~~~~~~~~ arch/powerpc/platforms/4xx/uic.c:319:14: warning: no previous prototype for 'uic_get_irq' [-Wmissing-prototypes] 319 | unsigned int uic_get_irq(void) | ^~~~~~~~~~~ CC arch/powerpc/platforms/4xx/machine_check.o CC arch/powerpc/platforms/4xx/soc.o arch/powerpc/platforms/4xx/soc.c:193:6: warning: no previous prototype for 'ppc4xx_reset_system' [-Wmissing-prototypes] 193 | void ppc4xx_reset_system(char *cmd) | ^~~~~~~~~~~~~~~~~~~ Add missing includes to get the missing prototypes. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c8253017e355638132737ff47936e290df8738d1.1692282432.git.christophe.leroy@csgroup.eu --- arch/powerpc/platforms/4xx/soc.c | 1 + arch/powerpc/platforms/4xx/uic.c | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/platforms/4xx/soc.c b/arch/powerpc/platforms/4xx/soc.c index f91df08278777..b2d940437a662 100644 --- a/arch/powerpc/platforms/4xx/soc.c +++ b/arch/powerpc/platforms/4xx/soc.c @@ -21,6 +21,7 @@ #include #include #include +#include static u32 dcrbase_l2c; diff --git a/arch/powerpc/platforms/4xx/uic.c b/arch/powerpc/platforms/4xx/uic.c index d667ad039bd3a..e3e148b9dd183 100644 --- a/arch/powerpc/platforms/4xx/uic.c +++ b/arch/powerpc/platforms/4xx/uic.c @@ -24,6 +24,7 @@ #include #include #include +#include #define NR_UIC_INTS 32 From d1eb75e0dfed80d2d85b664e28a39f65b290ab55 Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Tue, 4 Jul 2023 10:37:15 +0530 Subject: [PATCH 084/135] powerpc/fadump: reset dump area size if fadump memory reserve fails In case fadump_reserve_mem() fails to reserve memory, the reserve_dump_area_size variable will retain the reserve area size. This will lead to /sys/kernel/fadump/mem_reserved node displaying an incorrect memory reserved by fadump. To fix this problem, reserve dump area size variable is set to 0 if fadump failed to reserve memory. Fixes: 8255da95e545 ("powerpc/fadump: release all the memory above boot memory size") Signed-off-by: Sourabh Jain Acked-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Link: https://msgid.link/20230704050715.203581-1-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index ea0a073abd969..3ff2da7b120b5 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -654,6 +654,7 @@ int __init fadump_reserve_mem(void) return ret; error_out: fw_dump.fadump_enabled = 0; + fw_dump.reserve_dump_area_size = 0; return 0; } From 455d3d38ef9d5f69c504d1af5fa2359563ea4148 Mon Sep 17 00:00:00 2001 From: Trevor Woerner Date: Thu, 17 Aug 2023 07:50:14 -0400 Subject: [PATCH 085/135] powerpc/configs: Drop CONFIG_IP_NF_TARGET_CLUSTERIP Drop CONFIG_IP_NF_TARGET_CLUSTERIP as it was removed in commit 9db5d918e2c0 ("netfilter: ip_tables: remove clusterip target"). Signed-off-by: Trevor Woerner Signed-off-by: Michael Ellerman Link: https://msgid.link/20230817115017.35663-5-twoerner@gmail.com --- arch/powerpc/configs/ppc6xx_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig index f21170b8fa11e..98c32f7414789 100644 --- a/arch/powerpc/configs/ppc6xx_defconfig +++ b/arch/powerpc/configs/ppc6xx_defconfig @@ -183,7 +183,6 @@ CONFIG_IP_NF_MATCH_TTL=m CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m CONFIG_IP_NF_TARGET_ECN=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IP_NF_RAW=m From 4d15721177d539d743fcf31d7bb376fb3b81aeb6 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Tue, 1 Aug 2023 10:14:46 +0530 Subject: [PATCH 086/135] powerpc/mm: Cleanup memory block size probing Parse the device tree in early init to find the memory block size to be used by the kernel. Consolidate the memory block size device tree parsing to one helper and use that on both powernv and pseries. We still want to use machine-specific callback because on all machine types other than powernv and pseries we continue to return MIN_MEMORY_BLOCK_SIZE. pseries_memory_block_size used to look for the second memory block (memory@x) to determine the memory_block_size value. This patch changed that to look at all memory blocks and make sure we can map them all correctly using the computed memory block size value. Add workaround to force 256MB memory block size if device driver managed memory such as GPU memory is present. This helps to add GPU memory that is not aligned to 1G. Co-developed-by: Reza Arbab Signed-off-by: Reza Arbab Signed-off-by: "Aneesh Kumar K.V" Signed-off-by: Michael Ellerman Link: https://msgid.link/20230801044447.11275-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/book3s/64/mmu.h | 5 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 65 +--------- arch/powerpc/mm/init_64.c | 112 ++++++++++++++++++ arch/powerpc/platforms/powernv/setup.c | 10 +- .../platforms/pseries/hotplug-memory.c | 60 +--------- arch/powerpc/platforms/pseries/pseries.h | 2 - arch/powerpc/platforms/pseries/setup.c | 7 ++ 7 files changed, 126 insertions(+), 135 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 5cf0e9c953b32..fedbc5d381917 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -71,10 +71,7 @@ extern unsigned int mmu_pid_bits; /* Base PID to allocate from */ extern unsigned int mmu_base_pid; -/* - * memory block size used with radix translation. - */ -extern unsigned long __ro_after_init radix_mem_block_size; +extern unsigned long __ro_after_init memory_block_size; #define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) #define PRTB_ENTRIES (1ul << mmu_pid_bits) diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index e7ea492ac510a..b5102491b50fa 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -37,7 +37,6 @@ #include unsigned int mmu_base_pid; -unsigned long radix_mem_block_size __ro_after_init; static __ref void *early_alloc_pgtable(unsigned long size, int nid, unsigned long region_start, unsigned long region_end) @@ -300,7 +299,7 @@ static int __meminit create_physical_mapping(unsigned long start, bool prev_exec, exec = false; pgprot_t prot; int psize; - unsigned long max_mapping_size = radix_mem_block_size; + unsigned long max_mapping_size = memory_block_size; if (debug_pagealloc_enabled_or_kfence()) max_mapping_size = PAGE_SIZE; @@ -502,58 +501,6 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, return 1; } -#ifdef CONFIG_MEMORY_HOTPLUG -static int __init probe_memory_block_size(unsigned long node, const char *uname, int - depth, void *data) -{ - unsigned long *mem_block_size = (unsigned long *)data; - const __be32 *prop; - int len; - - if (depth != 1) - return 0; - - if (strcmp(uname, "ibm,dynamic-reconfiguration-memory")) - return 0; - - prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len); - - if (!prop || len < dt_root_size_cells * sizeof(__be32)) - /* - * Nothing in the device tree - */ - *mem_block_size = MIN_MEMORY_BLOCK_SIZE; - else - *mem_block_size = of_read_number(prop, dt_root_size_cells); - return 1; -} - -static unsigned long __init radix_memory_block_size(void) -{ - unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE; - - /* - * OPAL firmware feature is set by now. Hence we are ok - * to test OPAL feature. - */ - if (firmware_has_feature(FW_FEATURE_OPAL)) - mem_block_size = 1UL * 1024 * 1024 * 1024; - else - of_scan_flat_dt(probe_memory_block_size, &mem_block_size); - - return mem_block_size; -} - -#else /* CONFIG_MEMORY_HOTPLUG */ - -static unsigned long __init radix_memory_block_size(void) -{ - return 1UL * 1024 * 1024 * 1024; -} - -#endif /* CONFIG_MEMORY_HOTPLUG */ - - void __init radix__early_init_devtree(void) { int rc; @@ -577,16 +524,6 @@ void __init radix__early_init_devtree(void) mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize = psize_to_rpti_pgsize(MMU_PAGE_64K); } - - /* - * Max mapping size used when mapping pages. We don't use - * ppc_md.memory_block_size() here because this get called - * early and we don't have machine probe called yet. Also - * the pseries implementation only check for ibm,lmb-size. - * All hypervisor supporting radix do expose that device - * tree node. - */ - radix_mem_block_size = radix_memory_block_size(); return; } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index fe1b83020e0df..d74d4a441616a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #include @@ -471,6 +472,115 @@ static int __init dt_scan_mmu_pid_width(unsigned long node, return 1; } +static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size) +{ + unsigned long section_size = 1UL << SECTION_SIZE_BITS; + + for (; *block_size > section_size; *block_size >>= 2) { + + if ((mem_size & *block_size) == 0) + break; + } +} + +static int __init probe_memory_block_size(unsigned long node, const char *uname, int + depth, void *data) +{ + const char *type; + unsigned long *block_size = (unsigned long *)data; + const __be32 *reg, *endp; + int l; + + if (depth != 1) + return 0; + /* + * If we have dynamic-reconfiguration-memory node, use the + * lmb value. + */ + if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) { + + const __be32 *prop; + + prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l); + + if (!prop || l < dt_root_size_cells * sizeof(__be32)) + /* + * Nothing in the device tree + */ + *block_size = MIN_MEMORY_BLOCK_SIZE; + else + *block_size = of_read_number(prop, dt_root_size_cells); + /* + * We have found the final value. Don't probe further. + */ + return 1; + } + /* + * Find all the device tree nodes of memory type and make sure + * the area can be mapped using the memory block size value + * we end up using. We start with 1G value and keep reducing + * it such that we can map the entire area using memory_block_size. + * This will be used on powernv and older pseries that don't + * have ibm,lmb-size node. + * For ex: with P5 we can end up with + * memory@0 -> 128MB + * memory@128M -> 64M + * This will end up using 64MB memory block size value. + */ + type = of_get_flat_dt_prop(node, "device_type", NULL); + if (type == NULL || strcmp(type, "memory") != 0) + return 0; + + reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l); + if (!reg) + reg = of_get_flat_dt_prop(node, "reg", &l); + if (!reg) + return 0; + + endp = reg + (l / sizeof(__be32)); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { + const char *compatible; + u64 size; + + dt_mem_next_cell(dt_root_addr_cells, ®); + size = dt_mem_next_cell(dt_root_size_cells, ®); + + if (size) { + update_memory_block_size(block_size, size); + continue; + } + /* + * ibm,coherent-device-memory with linux,usable-memory = 0 + * Force 256MiB block size. Work around for GPUs on P9 PowerNV + * linux,usable-memory == 0 implies driver managed memory and + * we can't use large memory block size due to hotplug/unplug + * limitations. + */ + compatible = of_get_flat_dt_prop(node, "compatible", NULL); + if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) { + *block_size = SZ_256M; + return 1; + } + } + /* continue looking for other memory device types */ + return 0; +} + +/* + * start with 1G memory block size. Early init will + * fix this with correct value. + */ +unsigned long memory_block_size __ro_after_init = 1UL << 30; +static void __init early_init_memory_block_size(void) +{ + /* + * We need to do memory_block_size probe early so that + * radix__early_init_mmu() can use this as limit for + * mapping page size. + */ + of_scan_flat_dt(probe_memory_block_size, &memory_block_size); +} + void __init mmu_early_init_devtree(void) { bool hvmode = !!(mfmsr() & MSR_HV); @@ -504,6 +614,8 @@ void __init mmu_early_init_devtree(void) if (!hvmode) early_check_vec5(); + early_init_memory_block_size(); + if (early_radix_enabled()) { radix__early_init_devtree(); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 5e9c6b55809fe..4dbb47ddbdcc4 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -482,15 +482,7 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) #ifdef CONFIG_MEMORY_HOTPLUG static unsigned long pnv_memory_block_size(void) { - /* - * We map the kernel linear region with 1GB large pages on radix. For - * memory hot unplug to work our memory block size must be at least - * this size. - */ - if (radix_enabled()) - return radix_mem_block_size; - else - return 256UL * 1024 * 1024; + return memory_block_size; } #endif diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9c62c2c3b3d0b..1333d9ab76215 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -21,54 +21,6 @@ #include #include "pseries.h" -unsigned long pseries_memory_block_size(void) -{ - struct device_node *np; - u64 memblock_size = MIN_MEMORY_BLOCK_SIZE; - struct resource r; - - np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory"); - if (np) { - int len; - int size_cells; - const __be32 *prop; - - size_cells = of_n_size_cells(np); - - prop = of_get_property(np, "ibm,lmb-size", &len); - if (prop && len >= size_cells * sizeof(__be32)) - memblock_size = of_read_number(prop, size_cells); - of_node_put(np); - - } else if (machine_is(pseries)) { - /* This fallback really only applies to pseries */ - unsigned int memzero_size = 0; - - np = of_find_node_by_path("/memory@0"); - if (np) { - if (!of_address_to_resource(np, 0, &r)) - memzero_size = resource_size(&r); - of_node_put(np); - } - - if (memzero_size) { - /* We now know the size of memory@0, use this to find - * the first memoryblock and get its size. - */ - char buf[64]; - - sprintf(buf, "/memory@%x", memzero_size); - np = of_find_node_by_path(buf); - if (np) { - if (!of_address_to_resource(np, 0, &r)) - memblock_size = resource_size(&r); - of_node_put(np); - } - } - } - return memblock_size; -} - static void dlpar_free_property(struct property *prop) { kfree(prop->name); @@ -283,7 +235,7 @@ static int dlpar_offline_lmb(struct drmem_lmb *lmb) static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size) { - unsigned long block_sz, start_pfn; + unsigned long start_pfn; int sections_per_block; int i; @@ -294,8 +246,7 @@ static int pseries_remove_memblock(unsigned long base, unsigned long memblock_si if (!pfn_valid(start_pfn)) goto out; - block_sz = pseries_memory_block_size(); - sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE; for (i = 0; i < sections_per_block; i++) { __remove_memory(base, MIN_MEMORY_BLOCK_SIZE); @@ -354,7 +305,6 @@ static int dlpar_add_lmb(struct drmem_lmb *); static int dlpar_remove_lmb(struct drmem_lmb *lmb) { struct memory_block *mem_block; - unsigned long block_sz; int rc; if (!lmb_is_removable(lmb)) @@ -370,13 +320,11 @@ static int dlpar_remove_lmb(struct drmem_lmb *lmb) return rc; } - block_sz = pseries_memory_block_size(); - - __remove_memory(lmb->base_addr, block_sz); + __remove_memory(lmb->base_addr, memory_block_size); put_device(&mem_block->dev); /* Update memory regions for memory remove */ - memblock_remove(lmb->base_addr, block_sz); + memblock_remove(lmb->base_addr, memory_block_size); invalidate_lmb_associativity_index(lmb); lmb->flags &= ~DRCONF_MEM_ASSIGNED; diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index f8893ba46e83a..8376f03f932a4 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -92,8 +92,6 @@ extern struct pci_controller_ops pseries_pci_controller_ops; int pseries_msi_allocate_domains(struct pci_controller *phb); void pseries_msi_free_domains(struct pci_controller *phb); -unsigned long pseries_memory_block_size(void); - extern int CMO_PrPSP; extern int CMO_SecPSP; extern unsigned long CMO_PageSize; diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 41451b76c6e51..bb0a9aeb50f90 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -1118,6 +1118,13 @@ static int pSeries_pci_probe_mode(struct pci_bus *bus) return PCI_PROBE_NORMAL; } +#ifdef CONFIG_MEMORY_HOTPLUG +static unsigned long pseries_memory_block_size(void) +{ + return memory_block_size; +} +#endif + struct pci_controller_ops pseries_pci_controller_ops = { .probe_mode = pSeries_pci_probe_mode, #ifdef CONFIG_SPAPR_TCE_IOMMU From 7f3c5d099b6f8452dc4dcfe4179ea48e6a13d0eb Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Thu, 17 Aug 2023 11:11:56 -0700 Subject: [PATCH 087/135] Revert "powerpc/xmon: Relax frame size for clang" This reverts commit 9c87156cce5a63735d1218f0096a65c50a7a32aa. I have not been able to reproduce the reported -Wframe-larger-than= warning (or disassembly) with clang-11 or clang-18. I don't know precisely when this was fixed in llvm, but it may be time to revert this. Closes: https://github.com/ClangBuiltLinux/linux/issues/252 Signed-off-by: Nick Desaulniers Reviewed-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://msgid.link/20230817-ppc_xmon-v1-1-8cc2d51b9995@google.com --- arch/powerpc/xmon/Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index d334de392e6ca..7705aa74a24d3 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -10,12 +10,6 @@ KCSAN_SANITIZE := n # Disable ftrace for the entire directory ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE) -ifdef CONFIG_CC_IS_CLANG -# clang stores addresses on the stack causing the frame size to blow -# out. See https://github.com/ClangBuiltLinux/linux/issues/252 -KBUILD_CFLAGS += -Wframe-larger-than=4096 -endif - ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-y += xmon.o nonstdio.o spr_access.o xmon_bpts.o From 89c9ce1c99df553029fc4503506ff5a1793f3eaf Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Thu, 17 Aug 2023 18:24:08 +0200 Subject: [PATCH 088/135] powerpc: Move DMA64_PROPNAME define to a header Avoid redefining the same value in multiple source. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://msgid.link/20230817162411.429-1-msuchanek@suse.de --- arch/powerpc/include/asm/iommu.h | 3 +++ arch/powerpc/kexec/file_load_64.c | 5 +---- arch/powerpc/platforms/pseries/iommu.c | 2 -- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h index 34e14dfd8e042..0266959435505 100644 --- a/arch/powerpc/include/asm/iommu.h +++ b/arch/powerpc/include/asm/iommu.h @@ -28,6 +28,9 @@ #define IOMMU_PAGE_MASK(tblptr) (~((1 << (tblptr)->it_page_shift) - 1)) #define IOMMU_PAGE_ALIGN(addr, tblptr) ALIGN(addr, IOMMU_PAGE_SIZE(tblptr)) +#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" +#define DMA64_PROPNAME "linux,dma64-ddr-window-info" + /* Boot time flags */ extern int iommu_is_off; extern int iommu_force_on; diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 17534daa3c48c..a3de5369d22c2 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -1208,8 +1209,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, if (ret < 0) goto out; -#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" -#define DMA64_PROPNAME "linux,dma64-ddr-window-info" ret = update_pci_dma_nodes(fdt, DIRECT64_PROPNAME); if (ret < 0) goto out; @@ -1217,8 +1216,6 @@ int setup_new_fdt_ppc64(const struct kimage *image, void *fdt, ret = update_pci_dma_nodes(fdt, DMA64_PROPNAME); if (ret < 0) goto out; -#undef DMA64_PROPNAME -#undef DIRECT64_PROPNAME /* Update memory reserve map */ ret = get_reserved_memory_ranges(&rmem); diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index d593a7227dc91..16d93b580f61f 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -395,8 +395,6 @@ static LIST_HEAD(dma_win_list); static DEFINE_SPINLOCK(dma_win_list_lock); /* protects initializing window twice for same device */ static DEFINE_MUTEX(dma_win_init_mutex); -#define DIRECT64_PROPNAME "linux,direct64-ddr-window-info" -#define DMA64_PROPNAME "linux,dma64-ddr-window-info" static int tce_clearrange_multi_pSeriesLP(unsigned long start_pfn, unsigned long num_pfn, const void *arg) From ae3a8cc292d01a1558dff837bb485712dfaeb9c6 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 17 Aug 2023 11:21:52 -0500 Subject: [PATCH 089/135] powerpc/selftests: sort mm/.gitignore, add exec_prot Add exec_prot to to mm/.gitignore and sort the result. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20230817-powerpc-selftest-misc-v1-1-a84cc1ef78b2@linux.ibm.com --- tools/testing/selftests/powerpc/mm/.gitignore | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 4e1a294eec355..0df1a3afc5e2e 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -1,15 +1,16 @@ # SPDX-License-Identifier: GPL-2.0-only +bad_accesses +exec_prot hugetlb_vs_thp_test -subpage_prot -tempfile -prot_sao -segv_errors -wild_bctr large_vm_fork_separation -bad_accesses -tlbie_test +large_vm_gpr_corruption pkey_exec_prot pkey_siginfo +prot_sao +segv_errors stack_expansion_ldst stack_expansion_signal -large_vm_gpr_corruption +subpage_prot +tempfile +tlbie_test +wild_bctr From 701ca3657d5d489add2bedce0c31938e521c7913 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Thu, 17 Aug 2023 11:21:53 -0500 Subject: [PATCH 090/135] selftests/powerpc: add const qualification where possible Various char * parameters in the common powerpc selftest APIs can be const. Signed-off-by: Nathan Lynch Signed-off-by: Michael Ellerman Link: https://msgid.link/20230817-powerpc-selftest-misc-v1-2-a84cc1ef78b2@linux.ibm.com --- tools/testing/selftests/powerpc/harness.c | 4 ++-- .../testing/selftests/powerpc/include/subunit.h | 16 ++++++++-------- tools/testing/selftests/powerpc/include/utils.h | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 0ad4f12b3d43a..5876220d8ff23 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -24,7 +24,7 @@ /* Setting timeout to -1 disables the alarm */ static uint64_t timeout = 120; -int run_test(int (test_function)(void), char *name) +int run_test(int (test_function)(void), const char *name) { bool terminated; int rc, status; @@ -101,7 +101,7 @@ void test_harness_set_timeout(uint64_t time) timeout = time; } -int test_harness(int (test_function)(void), char *name) +int test_harness(int (test_function)(void), const char *name) { int rc; diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h index 068d55fdf80f9..b0bb774617c9c 100644 --- a/tools/testing/selftests/powerpc/include/subunit.h +++ b/tools/testing/selftests/powerpc/include/subunit.h @@ -6,37 +6,37 @@ #ifndef _SELFTESTS_POWERPC_SUBUNIT_H #define _SELFTESTS_POWERPC_SUBUNIT_H -static inline void test_start(char *name) +static inline void test_start(const char *name) { printf("test: %s\n", name); } -static inline void test_failure_detail(char *name, char *detail) +static inline void test_failure_detail(const char *name, const char *detail) { printf("failure: %s [%s]\n", name, detail); } -static inline void test_failure(char *name) +static inline void test_failure(const char *name) { printf("failure: %s\n", name); } -static inline void test_error(char *name) +static inline void test_error(const char *name) { printf("error: %s\n", name); } -static inline void test_skip(char *name) +static inline void test_skip(const char *name) { printf("skip: %s\n", name); } -static inline void test_success(char *name) +static inline void test_success(const char *name) { printf("success: %s\n", name); } -static inline void test_finish(char *name, int status) +static inline void test_finish(const char *name, int status) { if (status) test_failure(name); @@ -44,7 +44,7 @@ static inline void test_finish(char *name, int status) test_success(name); } -static inline void test_set_git_version(char *value) +static inline void test_set_git_version(const char *value) { printf("tags: git_version:%s\n", value); } diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 36c30c6114573..66d7b2368dd48 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -32,7 +32,7 @@ typedef uint16_t u16; typedef uint8_t u8; void test_harness_set_timeout(uint64_t time); -int test_harness(int (test_function)(void), char *name); +int test_harness(int (test_function)(void), const char *name); int read_auxv(char *buf, ssize_t buf_size); void *find_auxv_entry(int type, char *auxv); From b949ee6801f4a47ae0e02845242c99af351c4d26 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Fri, 9 Jun 2023 12:44:04 +0530 Subject: [PATCH 091/135] powerpc/fadump: invoke ibm,os-term with rtas_call_unlocked() Invoke ibm,os-term call with rtas_call_unlocked(), without using the RTAS spinlock, to avoid deadlock in the unlikely event of a machine crash while making an RTAS call. Signed-off-by: Hari Bathini Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Link: https://msgid.link/20230609071404.425529-1-hbathini@linux.ibm.com --- arch/powerpc/kernel/rtas.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index c087eeee320ff..a8192e5b1a5f5 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1587,6 +1587,7 @@ static bool ibm_extended_os_term; void rtas_os_term(char *str) { s32 token = rtas_function_token(RTAS_FN_IBM_OS_TERM); + static struct rtas_args args; int status; /* @@ -1607,7 +1608,8 @@ void rtas_os_term(char *str) * schedules. */ do { - status = rtas_call(token, 1, 1, NULL, __pa(rtas_os_term_buf)); + rtas_call_unlocked(&args, token, 1, 1, NULL, __pa(rtas_os_term_buf)); + status = be32_to_cpu(args.rets[0]); } while (rtas_busy_delay_time(status)); if (status != 0) From 0ceef6e99cc3a57f33b84dafc6df5dfb3b28278d Mon Sep 17 00:00:00 2001 From: Vaibhav Jain Date: Fri, 18 Aug 2023 10:37:37 +0530 Subject: [PATCH 092/135] powerpc/idle: Add support for nohlt This patch enables config option GENERIC_IDLE_POLL_SETUP for arch powerpc. This adds support for kernel param 'nohlt'. Powerpc kernel also supports another kernel boot-time param called 'powersave' which can also be used to disable all cpu idle-states and forces CPU to an idle-loop similar to what cpu_idle_poll() does. This patch however makes powerpc kernel-parameters better aligned to the generic boot-time parameters. Signed-off-by: Vaibhav Jain Signed-off-by: Michael Ellerman Link: https://msgid.link/20230818050739.827851-1-vaibhav@linux.ibm.com --- Documentation/admin-guide/kernel-parameters.txt | 2 +- arch/powerpc/Kconfig | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index cac4643867792..fd9f21b495821 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3717,7 +3717,7 @@ nohibernate [HIBERNATION] Disable hibernation and resume. - nohlt [ARM,ARM64,MICROBLAZE,MIPS,SH] Forces the kernel to + nohlt [ARM,ARM64,MICROBLAZE,MIPS,PPC,SH] Forces the kernel to busy wait in do_idle() and not use the arch_cpu_idle() implementation; requires CONFIG_GENERIC_IDLE_POLL_SETUP to be effective. This is useful on platforms where the diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b7a4cb5d9409a..cb0a89946c4e6 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -193,6 +193,7 @@ config PPC select GENERIC_CPU_VULNERABILITIES if PPC_BARRIER_NOSPEC select GENERIC_EARLY_IOREMAP select GENERIC_GETTIMEOFDAY + select GENERIC_IDLE_POLL_SETUP select GENERIC_IRQ_SHOW select GENERIC_IRQ_SHOW_LEVEL select GENERIC_PCI_IOMAP if PCI From e160bf64e2d3df7bf83ed41d09390a32490be6c5 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 18 Aug 2023 16:59:07 +0530 Subject: [PATCH 093/135] powerpc/rtas: export rtas_error_rc() for reuse. Also, #define descriptive names for common rtas return codes and use it instead of numeric values. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Link: https://msgid.link/169235811556.193557.1023625262204809514.stgit@jupiter --- arch/powerpc/include/asm/rtas.h | 3 +++ arch/powerpc/kernel/rtas.c | 41 +++++++++++++++++---------------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 3abe15ac79db1..c697c3c746946 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -202,7 +202,9 @@ typedef struct { #define RTAS_USER_REGION_SIZE (64 * 1024) /* RTAS return status codes */ +#define RTAS_HARDWARE_ERROR -1 /* Hardware Error */ #define RTAS_BUSY -2 /* RTAS Busy */ +#define RTAS_INVALID_PARAMETER -3 /* Invalid indicator/domain/sensor etc. */ #define RTAS_EXTENDED_DELAY_MIN 9900 #define RTAS_EXTENDED_DELAY_MAX 9905 @@ -425,6 +427,7 @@ extern int rtas_set_indicator(int indicator, int index, int new_value); extern int rtas_set_indicator_fast(int indicator, int index, int new_value); extern void rtas_progress(char *s, unsigned short hex); int rtas_ibm_suspend_me(int *fw_status); +int rtas_error_rc(int rtas_rc); struct rtc_time; extern time64_t rtas_get_boot_time(void); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index a8192e5b1a5f5..eddc031c4b95f 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -1330,33 +1330,34 @@ bool __ref rtas_busy_delay(int status) } EXPORT_SYMBOL_GPL(rtas_busy_delay); -static int rtas_error_rc(int rtas_rc) +int rtas_error_rc(int rtas_rc) { int rc; switch (rtas_rc) { - case -1: /* Hardware Error */ - rc = -EIO; - break; - case -3: /* Bad indicator/domain/etc */ - rc = -EINVAL; - break; - case -9000: /* Isolation error */ - rc = -EFAULT; - break; - case -9001: /* Outstanding TCE/PTE */ - rc = -EEXIST; - break; - case -9002: /* No usable slot */ - rc = -ENODEV; - break; - default: - pr_err("%s: unexpected error %d\n", __func__, rtas_rc); - rc = -ERANGE; - break; + case RTAS_HARDWARE_ERROR: /* Hardware Error */ + rc = -EIO; + break; + case RTAS_INVALID_PARAMETER: /* Bad indicator/domain/etc */ + rc = -EINVAL; + break; + case -9000: /* Isolation error */ + rc = -EFAULT; + break; + case -9001: /* Outstanding TCE/PTE */ + rc = -EEXIST; + break; + case -9002: /* No usable slot */ + rc = -ENODEV; + break; + default: + pr_err("%s: unexpected error %d\n", __func__, rtas_rc); + rc = -ERANGE; + break; } return rc; } +EXPORT_SYMBOL_GPL(rtas_error_rc); int rtas_get_power_level(int powerdomain, int *level) { From 77583f77ed9b1452ac62caebf09b2206da10bbf9 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Fri, 18 Aug 2023 16:59:43 +0530 Subject: [PATCH 094/135] PCI: rpaphp: Error out on busy status from get-sensor-state When certain PHB HW failure causes pHyp to recover PHB, it marks the PE state as temporarily unavailable until recovery is complete. This also triggers an EEH handler in Linux which needs to notify drivers, and perform recovery. But before notifying the driver about the PCI error it uses get_adapter_status()->rpaphp_get_sensor_state()->rtas_call(get-sensor-state) operation of the hotplug_slot to determine if the slot contains a device or not. If the slot is empty, the recovery is skipped entirely. eeh_event_handler() ->eeh_handle_normal_event() ->eeh_slot_presence_check() ->get_adapter_status() ->rpaphp_get_sensor_state() ->rtas_get_sensor() ->rtas_call(get-sensor-state) However on certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns extended busy error (9902) until PHB is recovered by pHyp. Once PHB is recovered, the rtas_call(get-sensor-state) returns success with correct presence status. The RTAS call interface rtas_get_sensor() loops over the RTAS call on extended delay return code (9902) until the return value is either success (0) or error (-1). This causes the EEH handler to get stuck for ~6 seconds before it could notify that the PCI error has been detected and stop any active operations. Hence with running I/O traffic, during this 6 seconds, the network driver continues its operation and hits a timeout (netdev watchdog). ------------ [52732.244731] DEBUG: ibm_read_slot_reset_state2() [52732.244762] DEBUG: ret = 0, rets[0]=5, rets[1]=1, rets[2]=4000, rets[3]=> [52732.244798] DEBUG: in eeh_slot_presence_check [52732.244804] DEBUG: error state check [52732.244807] DEBUG: Is slot hotpluggable [52732.244810] DEBUG: hotpluggable ops ? [52732.244953] DEBUG: Calling ops->get_adapter_status [52732.244958] DEBUG: calling rpaphp_get_sensor_state [52736.564262] ------------[ cut here ]------------ [52736.564299] NETDEV WATCHDOG: enP64p1s0f3 (tg3): transmit queue 0 timed o> [52736.564324] WARNING: CPU: 1442 PID: 0 at net/sched/sch_generic.c:478 dev> [...] [52736.564505] NIP [c000000000c32368] dev_watchdog+0x438/0x440 [52736.564513] LR [c000000000c32364] dev_watchdog+0x434/0x440 ------------ On timeouts, network driver starts dumping debug information to console (e.g bnx2 driver calls bnx2x_panic_dump()), and go into recovery path while pHyp is still recovering the PHB. As part of recovery, the driver tries to reset the device and it keeps failing since every PCI read/write returns ff's. And when EEH recovery kicks-in, the driver is unable to recover the device. This impacts the ssh connection and leads to the system being inaccessible. To get the NIC working again it needs a reboot or re-assign the I/O adapter from HMC. [ 9531.168587] EEH: Beginning: 'slot_reset' [ 9531.168601] PCI 0013:01:00.0#10000: EEH: Invoking bnx2x->slot_reset() [...] [ 9614.110094] bnx2x: [bnx2x_func_stop:9129(enP19p1s0f0)]FUNC_STOP ramrod failed. Running a dry transaction [ 9614.110300] bnx2x: [bnx2x_igu_int_disable:902(enP19p1s0f0)]BUG! Proper val not read from IGU! [ 9629.178067] bnx2x: [bnx2x_fw_command:3055(enP19p1s0f0)]FW failed to respond! [ 9629.178085] bnx2x 0013:01:00.0 enP19p1s0f0: bc 7.10.4 [ 9629.178091] bnx2x: [bnx2x_fw_dump_lvl:789(enP19p1s0f0)]Cannot dump MCP info while in PCI error [ 9644.241813] bnx2x: [bnx2x_io_slot_reset:14245(enP19p1s0f0)]IO slot reset --> driver unload [...] [ 9644.241819] PCI 0013:01:00.0#10000: EEH: bnx2x driver reports: 'disconnect' [ 9644.241823] PCI 0013:01:00.1#10000: EEH: Invoking bnx2x->slot_reset() [ 9644.241827] bnx2x: [bnx2x_io_slot_reset:14229(enP19p1s0f1)]IO slot reset initializing... [ 9644.241916] bnx2x 0013:01:00.1: enabling device (0140 -> 0142) [ 9644.258604] bnx2x: [bnx2x_io_slot_reset:14245(enP19p1s0f1)]IO slot reset --> driver unload [ 9644.258612] PCI 0013:01:00.1#10000: EEH: bnx2x driver reports: 'disconnect' [ 9644.258615] EEH: Finished:'slot_reset' with aggregate recovery state:'disconnect' [ 9644.258620] EEH: Unable to recover from failure from PHB#13-PE#10000. [ 9644.261811] EEH: Beginning: 'error_detected(permanent failure)' [...] [ 9644.261823] EEH: Finished:'error_detected(permanent failure)' Hence, it becomes important to inform driver about the PCI error detection as early as possible, so that driver is aware of PCI error and waits for EEH handler's next action for successful recovery. Current implementation uses rtas_get_sensor() API which blocks the slot check state until RTAS call returns success. To avoid this, fix the PCI hotplug driver (rpaphp) to return an error (-EBUSY) if the slot presence state can not be detected immediately while PE is in EEH recovery state. Change rpaphp_get_sensor_state() to invoke rtas_call(get-sensor-state) directly only if the respective PE is in EEH recovery state, and take actions based on RTAS return status. This way EEH handler will not be blocked on rpaphp_get_sensor_state() and can immediately notify driver about the PCI error and stop any active operations. In normal cases (non-EEH case) rpaphp_get_sensor_state() will continue to invoke rtas_get_sensor() as it was earlier with no change in existing behavior. Signed-off-by: Mahesh Salgaonkar Reviewed-by: Nathan Lynch Acked-by: Bjorn Helgaas Signed-off-by: Michael Ellerman Link: https://msgid.link/169235815601.193557.13989873835811325343.stgit@jupiter --- drivers/pci/hotplug/rpaphp_pci.c | 85 ++++++++++++++++++++++++++++++-- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/drivers/pci/hotplug/rpaphp_pci.c b/drivers/pci/hotplug/rpaphp_pci.c index 630f77057c23d..bcfd26ec6d30a 100644 --- a/drivers/pci/hotplug/rpaphp_pci.c +++ b/drivers/pci/hotplug/rpaphp_pci.c @@ -19,12 +19,92 @@ #include "../pci.h" /* for pci_add_new_bus */ #include "rpaphp.h" +/* + * RTAS call get-sensor-state(DR_ENTITY_SENSE) return values as per PAPR: + * -- generic return codes --- + * -1: Hardware Error + * -2: RTAS_BUSY + * -3: Invalid sensor. RTAS Parameter Error. + * -- rtas_get_sensor function specific return codes --- + * -9000: Need DR entity to be powered up and unisolated before RTAS call + * -9001: Need DR entity to be powered up, but not unisolated, before RTAS call + * -9002: DR entity unusable + * 990x: Extended delay - where x is a number in the range of 0-5 + */ +#define RTAS_SLOT_UNISOLATED -9000 +#define RTAS_SLOT_NOT_UNISOLATED -9001 +#define RTAS_SLOT_NOT_USABLE -9002 + +static int rtas_get_sensor_errno(int rtas_rc) +{ + switch (rtas_rc) { + case 0: + /* Success case */ + return 0; + case RTAS_SLOT_UNISOLATED: + case RTAS_SLOT_NOT_UNISOLATED: + return -EFAULT; + case RTAS_SLOT_NOT_USABLE: + return -ENODEV; + case RTAS_BUSY: + case RTAS_EXTENDED_DELAY_MIN...RTAS_EXTENDED_DELAY_MAX: + return -EBUSY; + default: + return rtas_error_rc(rtas_rc); + } +} + +/* + * get_adapter_status() can be called by the EEH handler during EEH recovery. + * On certain PHB failures, the RTAS call rtas_call(get-sensor-state) returns + * extended busy error (9902) until PHB is recovered by pHyp. The RTAS call + * interface rtas_get_sensor() loops over the RTAS call on extended delay + * return code (9902) until the return value is either success (0) or error + * (-1). This causes the EEH handler to get stuck for ~6 seconds before it + * could notify that the PCI error has been detected and stop any active + * operations. This sometimes causes EEH recovery to fail. To avoid this issue, + * invoke rtas_call(get-sensor-state) directly if the respective PE is in EEH + * recovery state and return -EBUSY error based on RTAS return status. This + * will help the EEH handler to notify the driver about the PCI error + * immediately and successfully proceed with EEH recovery steps. + */ + +static int __rpaphp_get_sensor_state(struct slot *slot, int *state) +{ + int rc; + int token = rtas_token("get-sensor-state"); + struct pci_dn *pdn; + struct eeh_pe *pe; + struct pci_controller *phb = PCI_DN(slot->dn)->phb; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + /* + * Fallback to existing method for empty slot or PE isn't in EEH + * recovery. + */ + pdn = list_first_entry_or_null(&PCI_DN(phb->dn)->child_list, + struct pci_dn, list); + if (!pdn) + goto fallback; + + pe = eeh_dev_to_pe(pdn->edev); + if (pe && (pe->state & EEH_PE_RECOVERING)) { + rc = rtas_call(token, 2, 2, state, DR_ENTITY_SENSE, + slot->index); + return rtas_get_sensor_errno(rc); + } +fallback: + return rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state); +} + int rpaphp_get_sensor_state(struct slot *slot, int *state) { int rc; int setlevel; - rc = rtas_get_sensor(DR_ENTITY_SENSE, slot->index, state); + rc = __rpaphp_get_sensor_state(slot, state); if (rc < 0) { if (rc == -EFAULT || rc == -EEXIST) { @@ -40,8 +120,7 @@ int rpaphp_get_sensor_state(struct slot *slot, int *state) dbg("%s: power on slot[%s] failed rc=%d.\n", __func__, slot->name, rc); } else { - rc = rtas_get_sensor(DR_ENTITY_SENSE, - slot->index, state); + rc = __rpaphp_get_sensor_state(slot, state); } } else if (rc == -ENODEV) info("%s: slot is unusable\n", __func__); From 34daf445f82bd3a4df852bb5f1dffd792ac830a0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 18 Aug 2023 10:59:44 +0200 Subject: [PATCH 095/135] powerpc/perf: Convert fsl_emb notifier to state machine callbacks CC arch/powerpc/perf/core-fsl-emb.o arch/powerpc/perf/core-fsl-emb.c:675:6: error: no previous prototype for 'hw_perf_event_setup' [-Werror=missing-prototypes] 675 | void hw_perf_event_setup(int cpu) | ^~~~~~~~~~~~~~~~~~~ Looks like fsl_emb was completely missed by commit 3f6da3905398 ("perf: Rework and fix the arch CPU-hotplug hooks") So, apply same changes as commit 3f6da3905398 ("perf: Rework and fix the arch CPU-hotplug hooks") then commit 57ecde42cc74 ("powerpc/perf: Convert book3s notifier to state machine callbacks") While at it, also fix following error: arch/powerpc/perf/core-fsl-emb.c: In function 'perf_event_interrupt': arch/powerpc/perf/core-fsl-emb.c:648:13: error: variable 'found' set but not used [-Werror=unused-but-set-variable] 648 | int found = 0; | ^~~~~ Fixes: 3f6da3905398 ("perf: Rework and fix the arch CPU-hotplug hooks") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/603e1facb32608f88f40b7d7b9094adc50e7b2dc.1692349125.git.christophe.leroy@csgroup.eu --- arch/powerpc/perf/core-fsl-emb.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index ee721f420a7ba..1a53ab08447cb 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -645,7 +645,6 @@ static void perf_event_interrupt(struct pt_regs *regs) struct cpu_hw_events *cpuhw = this_cpu_ptr(&cpu_hw_events); struct perf_event *event; unsigned long val; - int found = 0; for (i = 0; i < ppmu->n_counter; ++i) { event = cpuhw->event[i]; @@ -654,7 +653,6 @@ static void perf_event_interrupt(struct pt_regs *regs) if ((int)val < 0) { if (event) { /* event has overflowed */ - found = 1; record_and_restart(event, val, regs); } else { /* @@ -672,11 +670,13 @@ static void perf_event_interrupt(struct pt_regs *regs) isync(); } -void hw_perf_event_setup(int cpu) +static int fsl_emb_pmu_prepare_cpu(unsigned int cpu) { struct cpu_hw_events *cpuhw = &per_cpu(cpu_hw_events, cpu); memset(cpuhw, 0, sizeof(*cpuhw)); + + return 0; } int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) @@ -689,6 +689,8 @@ int register_fsl_emb_pmu(struct fsl_emb_pmu *pmu) pmu->name); perf_pmu_register(&fsl_emb_pmu, "cpu", PERF_TYPE_RAW); + cpuhp_setup_state(CPUHP_PERF_POWER, "perf/powerpc:prepare", + fsl_emb_pmu_prepare_cpu, NULL); return 0; } From f6834c8c59a8e977a6f6e4f96c5d28dfa5db8430 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:19 +0530 Subject: [PATCH 096/135] powerpc/ftrace: Fix dropping weak symbols with older toolchains The minimum level of gcc supported for building the kernel is v5.1. v5.x releases of gcc emitted a three instruction sequence for -mprofile-kernel: mflr r0 std r0, 16(r1) bl _mcount It is only with the v6.x releases that gcc started emitting the two instruction sequence for -mprofile-kernel, omitting the second store instruction. With the older three instruction sequence, the actual ftrace location can be the 5th instruction into a function. Update the allowed offset for ftrace location from 12 to 16 to accommodate the same. Cc: stable@vger.kernel.org Fixes: 7af82ff90a2b06 ("powerpc/ftrace: Ignore weak functions") Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/7b265908a9461e38fc756ef9b569703860a80621.1687166935.git.naveen@kernel.org --- arch/powerpc/include/asm/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 91c049d51d0e1..2edc6269b1a35 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -12,7 +12,7 @@ /* Ignore unused weak functions which will have larger offsets */ #ifdef CONFIG_MPROFILE_KERNEL -#define FTRACE_MCOUNT_MAX_OFFSET 12 +#define FTRACE_MCOUNT_MAX_OFFSET 16 #elif defined(CONFIG_PPC32) #define FTRACE_MCOUNT_MAX_OFFSET 8 #endif From 0240605931ec300ddb698020edff05a4c93edbb2 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:20 +0530 Subject: [PATCH 097/135] powerpc/module: Remove unused .ftrace.tramp section .ftrace.tramp section is not used for any purpose. This code was added all the way back in the original commit introducing support for dynamic ftrace on ppc64 modules. Remove it. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/9cf6d7f37ba82f7cb6dafecf660f44925c526d8d.1687166935.git.naveen@kernel.org --- arch/powerpc/include/asm/module.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index ac53606c25943..a8e2e8339fb7f 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -75,10 +75,6 @@ struct mod_arch_specific { #endif #ifdef CONFIG_DYNAMIC_FTRACE -# ifdef MODULE - asm(".section .ftrace.tramp,\"ax\",@nobits; .align 3; .previous"); -# endif /* MODULE */ - int module_trampoline_target(struct module *mod, unsigned long trampoline, unsigned long *target); int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs); From 7f7797b372693ce17223678428490dea2b3e4389 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:21 +0530 Subject: [PATCH 098/135] powerpc64/ftrace: Move ELFv1 and -pg support code into a separate file ELFv1 support is deprecated and on the way out. Pre -mprofile-kernel ftrace support (-pg only) is very limited and is retained primarily for clang builds. It won't be necessary once clang lands support for -fpatchable-function-entry. Copy the existing ftrace code supporting these into ftrace_pg.c. ftrace.c can then be refactored and enhanced with a focus on ppc32 and ppc64 ELFv2. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/1eb6cc6c3141ddb77a2a25f8a9e83d83ff312b02.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/Makefile | 13 +- arch/powerpc/kernel/trace/ftrace.c | 10 - arch/powerpc/kernel/trace/ftrace_pg.c | 846 ++++++++++++++++++++++++++ 3 files changed, 855 insertions(+), 14 deletions(-) create mode 100644 arch/powerpc/kernel/trace/ftrace_pg.c diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index b16a9f9c0b35f..342a2d1ae86cd 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -6,15 +6,16 @@ ifdef CONFIG_FUNCTION_TRACER # do not trace tracer code CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ftrace.o ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ftrace.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_pg.o endif -obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o ftrace.o +obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) @@ -25,3 +26,7 @@ GCOV_PROFILE_ftrace.o := n KCOV_INSTRUMENT_ftrace.o := n KCSAN_SANITIZE_ftrace.o := n UBSAN_SANITIZE_ftrace.o := n +GCOV_PROFILE_ftrace_pg.o := n +KCOV_INSTRUMENT_ftrace_pg.o := n +KCSAN_SANITIZE_ftrace_pg.o := n +UBSAN_SANITIZE_ftrace_pg.o := n diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index a47f303734233..81a121b56c4d7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -864,13 +864,3 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, } #endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ - -#ifdef CONFIG_PPC64_ELF_ABI_V1 -char *arch_ftrace_match_adjust(char *str, const char *search) -{ - if (str[0] == '.' && search[0] != '.') - return str + 1; - else - return str; -} -#endif /* CONFIG_PPC64_ELF_ABI_V1 */ diff --git a/arch/powerpc/kernel/trace/ftrace_pg.c b/arch/powerpc/kernel/trace/ftrace_pg.c new file mode 100644 index 0000000000000..7b85c3b460a3c --- /dev/null +++ b/arch/powerpc/kernel/trace/ftrace_pg.c @@ -0,0 +1,846 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Code for replacing ftrace calls with jumps. + * + * Copyright (C) 2007-2008 Steven Rostedt + * + * Thanks goes out to P.A. Semi, Inc for supplying me with a PPC64 box. + * + * Added function graph tracer code, taken from x86 that was written + * by Frederic Weisbecker, and ported to PPC by Steven Rostedt. + * + */ + +#define pr_fmt(fmt) "ftrace-powerpc: " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +/* + * We generally only have a single long_branch tramp and at most 2 or 3 plt + * tramps generated. But, we don't use the plt tramps currently. We also allot + * 2 tramps after .text and .init.text. So, we only end up with around 3 usable + * tramps in total. Set aside 8 just to be sure. + */ +#define NUM_FTRACE_TRAMPS 8 +static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; + +static ppc_inst_t +ftrace_call_replace(unsigned long ip, unsigned long addr, int link) +{ + ppc_inst_t op; + + addr = ppc_function_entry((void *)addr); + + /* if (link) set op to 'bl' else 'b' */ + create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); + + return op; +} + +static inline int +ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +{ + ppc_inst_t replaced; + + /* + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. + */ + + /* read the text we want to modify */ + if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (!ppc_inst_equal(replaced, old)) { + pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, + ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); + return -EINVAL; + } + + /* replace the text with the new text */ + return patch_instruction((u32 *)ip, new); +} + +/* + * Helper functions that are the same for both PPC64 and PPC32. + */ +static int test_24bit_addr(unsigned long ip, unsigned long addr) +{ + addr = ppc_function_entry((void *)addr); + + return is_offset_in_branch_range(addr - ip); +} + +static int is_bl_op(ppc_inst_t op) +{ + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); +} + +static int is_b_op(ppc_inst_t op) +{ + return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0); +} + +static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) +{ + int offset; + + offset = PPC_LI(ppc_inst_val(op)); + /* make it signed */ + if (offset & 0x02000000) + offset |= 0xfe000000; + + return ip + (long)offset; +} + +#ifdef CONFIG_MODULES +static int +__ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long entry, ptr, tramp; + unsigned long ip = rec->ip; + ppc_inst_t op, pop; + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { + if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { + pr_err("Fetching instruction at %lx failed.\n", ip - 4); + return -EFAULT; + } + + /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && + !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { + pr_err("Unexpected instruction %08lx around bl _mcount\n", + ppc_inst_as_ulong(op)); + return -EINVAL; + } + } else if (IS_ENABLED(CONFIG_PPC64)) { + /* + * Check what is in the next instruction. We can see ld r2,40(r1), but + * on first pass after boot we will see mflr r0. + */ + if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { + pr_err("Fetching op failed.\n"); + return -EFAULT; + } + + if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { + pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, + ppc_inst_as_ulong(op)); + return -EINVAL; + } + } + + /* + * When using -mprofile-kernel or PPC32 there is no load to jump over. + * + * Otherwise our original call site looks like: + * + * bl + * ld r2,XX(r1) + * + * Milton Miller pointed out that we can not simply nop the branch. + * If a task was preempted when calling a trace function, the nops + * will remove the way to restore the TOC in r2 and the r2 TOC will + * get corrupted. + * + * Use a b +8 to jump over the load. + */ + if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) + pop = ppc_inst(PPC_RAW_NOP()); + else + pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */ + + if (patch_instruction((u32 *)ip, pop)) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} +#else +static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +static unsigned long find_ftrace_tramp(unsigned long ip) +{ + int i; + + /* + * We have the compiler generated long_branch tramps at the end + * and we prefer those + */ + for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + if (!ftrace_tramps[i]) + continue; + else if (is_offset_in_branch_range(ftrace_tramps[i] - ip)) + return ftrace_tramps[i]; + + return 0; +} + +static int add_ftrace_tramp(unsigned long tramp) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return 0; + } + + return -1; +} + +/* + * If this is a compiler generated long_branch trampoline (essentially, a + * trampoline that has a branch to _mcount()), we re-write the branch to + * instead go to ftrace_[regs_]caller() and note down the location of this + * trampoline. + */ +static int setup_mcount_compiler_tramp(unsigned long tramp) +{ + int i; + ppc_inst_t op; + unsigned long ptr; + + /* Is this a known long jump tramp? */ + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (ftrace_tramps[i] == tramp) + return 0; + + /* New trampoline -- read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { + pr_debug("Fetching opcode failed.\n"); + return -1; + } + + /* Is this a 24 bit branch? */ + if (!is_b_op(op)) { + pr_debug("Trampoline is not a long branch tramp.\n"); + return -1; + } + + /* lets find where the pointer goes */ + ptr = find_bl_target(tramp, op); + + if (ptr != ppc_global_function_entry((void *)_mcount)) { + pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); + return -1; + } + + /* Let's re-write the tramp to go to ftrace_[regs_]caller */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + ptr = ppc_global_function_entry((void *)ftrace_regs_caller); + else + ptr = ppc_global_function_entry((void *)ftrace_caller); + + if (patch_branch((u32 *)tramp, ptr, 0)) { + pr_debug("REL24 out of range!\n"); + return -1; + } + + if (add_ftrace_tramp(tramp)) { + pr_debug("No tramp locations left\n"); + return -1; + } + + return 0; +} + +static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long tramp, ip = rec->ip; + ppc_inst_t op; + + /* Read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* Let's find where the pointer goes */ + tramp = find_bl_target(ip, op); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (setup_mcount_compiler_tramp(tramp)) { + /* Are other trampolines reachable? */ + if (!find_ftrace_tramp(ip)) { + pr_err("No ftrace trampolines reachable from %ps\n", + (void *)ip); + return -EINVAL; + } + } + + if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { + pr_err("Patching NOP failed.\n"); + return -EPERM; + } + + return 0; +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ftrace_call_replace(ip, addr, 1); + new = ppc_inst(PPC_RAW_NOP()); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + return __ftrace_make_nop_kernel(rec, addr); + } else if (!IS_ENABLED(CONFIG_MODULES)) { + return -EINVAL; + } + + /* + * Out of range jumps are called from modules. + * We should either already have a pointer to the module + * or it has been passed in. + */ + if (!rec->arch.mod) { + if (!mod) { + pr_err("No module loaded addr=%lx\n", addr); + return -EFAULT; + } + rec->arch.mod = mod; + } else if (mod) { + if (mod != rec->arch.mod) { + pr_err("Record mod %p not equal to passed in mod %p\n", + rec->arch.mod, mod); + return -EINVAL; + } + /* nothing to do if mod == rec->arch.mod */ + } else + mod = rec->arch.mod; + + return __ftrace_make_nop(mod, rec, addr); +} + +#ifdef CONFIG_MODULES +/* + * Examine the existing instructions for __ftrace_make_call. + * They should effectively be a NOP, and follow formal constraints, + * depending on the ABI. Return false if they don't. + */ +static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) +{ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); + else + return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && + ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); +} + +static int +__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + ppc_inst_t op[2]; + void *ip = (void *)rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = rec->arch.mod; + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(op, ip)) + return -EFAULT; + + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && + copy_inst_from_kernel_nofault(op + 1, ip + 4)) + return -EFAULT; + + if (!expected_nop_sequence(ip, op[0], op[1])) { + pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, + ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); + return -EINVAL; + } + + /* If we never set up ftrace trampoline(s), then bail */ + if (!mod->arch.tramp || + (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#else +static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + return 0; +} +#endif /* CONFIG_MODULES */ + +static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) +{ + ppc_inst_t op; + void *ip = (void *)rec->ip; + unsigned long tramp, entry, ptr; + + /* Make sure we're being asked to patch branch to a known ftrace addr */ + entry = ppc_global_function_entry((void *)ftrace_caller); + ptr = ppc_global_function_entry((void *)addr); + + if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + entry = ppc_global_function_entry((void *)ftrace_regs_caller); + + if (ptr != entry) { + pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); + return -EINVAL; + } + + /* Make sure we have a nop */ + if (copy_inst_from_kernel_nofault(&op, ip)) { + pr_err("Unable to read ftrace location %p\n", ip); + return -EFAULT; + } + + if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { + pr_err("Unexpected call sequence at %p: %08lx\n", + ip, ppc_inst_as_ulong(op)); + return -EINVAL; + } + + tramp = find_ftrace_tramp((unsigned long)ip); + if (!tramp) { + pr_err("No ftrace trampolines reachable from %ps\n", ip); + return -EINVAL; + } + + if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { + pr_err("Error patching branch to ftrace tramp!\n"); + return -EINVAL; + } + + return 0; +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + old = ppc_inst(PPC_RAW_NOP()); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + return __ftrace_make_call_kernel(rec, addr); + } else if (!IS_ENABLED(CONFIG_MODULES)) { + /* We should not get here without modules */ + return -EINVAL; + } + + /* + * Out of range jumps are called from modules. + * Being that we are converting from nop, it had better + * already have a module defined. + */ + if (!rec->arch.mod) { + pr_err("No module loaded\n"); + return -EINVAL; + } + + return __ftrace_make_call(rec, addr); +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#ifdef CONFIG_MODULES +static int +__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + ppc_inst_t op; + unsigned long ip = rec->ip; + unsigned long entry, ptr, tramp; + struct module *mod = rec->arch.mod; + + /* If we never set up ftrace trampolines, then bail */ + if (!mod->arch.tramp || !mod->arch.tramp_regs) { + pr_err("No ftrace trampoline\n"); + return -EINVAL; + } + + /* read where this goes */ + if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { + pr_err("Fetching opcode failed.\n"); + return -EFAULT; + } + + /* Make sure that this is still a 24bit jump */ + if (!is_bl_op(op)) { + pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); + return -EINVAL; + } + + /* lets find where the pointer goes */ + tramp = find_bl_target(ip, op); + entry = ppc_global_function_entry((void *)old_addr); + + pr_devel("ip:%lx jumps to %lx", ip, tramp); + + if (tramp != entry) { + /* old_addr is not within range, so we must have used a trampoline */ + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + } + + /* The new target may be within range */ + if (test_24bit_addr(ip, addr)) { + /* within range */ + if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; + } + + if (rec->flags & FTRACE_FL_REGS) + tramp = mod->arch.tramp_regs; + else + tramp = mod->arch.tramp; + + if (module_trampoline_target(mod, tramp, &ptr)) { + pr_err("Failed to get trampoline target\n"); + return -EFAULT; + } + + pr_devel("trampoline target %lx", ptr); + + entry = ppc_global_function_entry((void *)addr); + /* This should match what was called */ + if (ptr != entry) { + pr_err("addr %lx does not match expected %lx\n", ptr, entry); + return -EINVAL; + } + + if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { + pr_err("REL24 out of range!\n"); + return -EINVAL; + } + + return 0; +} +#else +static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +{ + return 0; +} +#endif + +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long ip = rec->ip; + ppc_inst_t old, new; + + /* + * If the calling address is more that 24 bits away, + * then we had to use a trampoline to make the call. + * Otherwise just update the call site. + */ + if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { + /* within range */ + old = ftrace_call_replace(ip, old_addr, 1); + new = ftrace_call_replace(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + /* + * We always patch out of range locations to go to the regs + * variant, so there is nothing to do here + */ + return 0; + } else if (!IS_ENABLED(CONFIG_MODULES)) { + /* We should not get here without modules */ + return -EINVAL; + } + + /* + * Out of range jumps are called from modules. + */ + if (!rec->arch.mod) { + pr_err("No module loaded\n"); + return -EINVAL; + } + + return __ftrace_modify_call(rec, old_addr, addr); +} +#endif + +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip = (unsigned long)(&ftrace_call); + ppc_inst_t old, new; + int ret; + + old = ppc_inst_read((u32 *)&ftrace_call); + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + + /* Also update the regs callback function */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) { + ip = (unsigned long)(&ftrace_regs_call); + old = ppc_inst_read((u32 *)&ftrace_regs_call); + new = ftrace_call_replace(ip, (unsigned long)func, 1); + ret = ftrace_modify_code(ip, old, new); + } + + return ret; +} + +/* + * Use the default ftrace_modify_all_code, but without + * stop_machine(). + */ +void arch_ftrace_update_code(int command) +{ + ftrace_modify_all_code(command); +} + +#ifdef CONFIG_PPC64 +#define PACATOC offsetof(struct paca_struct, kernel_toc) + +extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; + +void ftrace_free_init_tramp(void) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS && ftrace_tramps[i]; i++) + if (ftrace_tramps[i] == (unsigned long)ftrace_tramp_init) { + ftrace_tramps[i] = 0; + return; + } +} + +int __init ftrace_dyn_arch_init(void) +{ + int i; + unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; + u32 stub_insns[] = { + PPC_RAW_LD(_R12, _R13, PACATOC), + PPC_RAW_ADDIS(_R12, _R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() + }; + unsigned long addr; + long reladdr; + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + addr = ppc_global_function_entry((void *)ftrace_regs_caller); + else + addr = ppc_global_function_entry((void *)ftrace_caller); + + reladdr = addr - kernel_toc_addr(); + + if (reladdr >= SZ_2G || reladdr < -(long)SZ_2G) { + pr_err("Address of %ps out of range of kernel_toc.\n", + (void *)addr); + return -1; + } + + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][1] |= PPC_HA(reladdr); + tramp[i][2] |= PPC_LO(reladdr); + add_ftrace_tramp((unsigned long)tramp[i]); + } + + return 0; +} +#endif + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + +extern void ftrace_graph_call(void); +extern void ftrace_graph_stub(void); + +static int ftrace_modify_ftrace_graph_caller(bool enable) +{ + unsigned long ip = (unsigned long)(&ftrace_graph_call); + unsigned long addr = (unsigned long)(&ftrace_graph_caller); + unsigned long stub = (unsigned long)(&ftrace_graph_stub); + ppc_inst_t old, new; + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) + return 0; + + old = ftrace_call_replace(ip, enable ? stub : addr, 0); + new = ftrace_call_replace(ip, enable ? addr : stub, 0); + + return ftrace_modify_code(ip, old, new); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_ftrace_graph_caller(false); +} + +/* + * Hook the return address and push it in the stack of return addrs + * in current thread info. Return the address we want to divert to. + */ +static unsigned long +__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) +{ + unsigned long return_hooker; + int bit; + + if (unlikely(ftrace_graph_is_dead())) + goto out; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + goto out; + + bit = ftrace_test_recursion_trylock(ip, parent); + if (bit < 0) + goto out; + + return_hooker = ppc_function_entry(return_to_handler); + + if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) + parent = return_hooker; + + ftrace_test_recursion_unlock(bit); +out: + return parent; +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) +{ + fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); +} +#else +unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, + unsigned long sp) +{ + return __prepare_ftrace_return(parent, ip, sp); +} +#endif +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +#ifdef CONFIG_PPC64_ELF_ABI_V1 +char *arch_ftrace_match_adjust(char *str, const char *search) +{ + if (str[0] == '.' && search[0] != '.') + return str + 1; + else + return str; +} +#endif /* CONFIG_PPC64_ELF_ABI_V1 */ From 96d7a13610abcf6bff9d0d0e195c6d2650310125 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:22 +0530 Subject: [PATCH 099/135] powerpc/ftrace: Simplify function_graph support in ftrace.c Since we now support DYNAMIC_FTRACE_WITH_ARGS across ppc32 and ppc64 ELFv2, we can simplify function_graph tracer support code in ftrace.c Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/4dc92c4b1ed444dc62b748ae7327acdb9e096864.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 64 ++++-------------------------- 1 file changed, 7 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 81a121b56c4d7..f117124c30325 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -790,44 +790,10 @@ int __init ftrace_dyn_arch_init(void) #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER - -extern void ftrace_graph_call(void); -extern void ftrace_graph_stub(void); - -static int ftrace_modify_ftrace_graph_caller(bool enable) -{ - unsigned long ip = (unsigned long)(&ftrace_graph_call); - unsigned long addr = (unsigned long)(&ftrace_graph_caller); - unsigned long stub = (unsigned long)(&ftrace_graph_stub); - ppc_inst_t old, new; - - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS)) - return 0; - - old = ftrace_call_replace(ip, enable ? stub : addr, 0); - new = ftrace_call_replace(ip, enable ? addr : stub, 0); - - return ftrace_modify_code(ip, old, new); -} - -int ftrace_enable_ftrace_graph_caller(void) -{ - return ftrace_modify_ftrace_graph_caller(true); -} - -int ftrace_disable_ftrace_graph_caller(void) -{ - return ftrace_modify_ftrace_graph_caller(false); -} - -/* - * Hook the return address and push it in the stack of return addrs - * in current thread info. Return the address we want to divert to. - */ -static unsigned long -__prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp) +void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, + struct ftrace_ops *op, struct ftrace_regs *fregs) { - unsigned long return_hooker; + unsigned long sp = fregs->regs.gpr[1]; int bit; if (unlikely(ftrace_graph_is_dead())) @@ -836,31 +802,15 @@ __prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; - bit = ftrace_test_recursion_trylock(ip, parent); + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) goto out; - return_hooker = ppc_function_entry(return_to_handler); - - if (!function_graph_enter(parent, ip, 0, (unsigned long *)sp)) - parent = return_hooker; + if (!function_graph_enter(parent_ip, ip, 0, (unsigned long *)sp)) + parent_ip = ppc_function_entry(return_to_handler); ftrace_test_recursion_unlock(bit); out: - return parent; + fregs->regs.link = parent_ip; } - -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS -void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *op, struct ftrace_regs *fregs) -{ - fregs->regs.link = __prepare_ftrace_return(parent_ip, ip, fregs->regs.gpr[1]); -} -#else -unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, - unsigned long sp) -{ - return __prepare_ftrace_return(parent, ip, sp); -} -#endif #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ From b5efb61c70f8ba9b1e168185530b9c7342184a4c Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:23 +0530 Subject: [PATCH 100/135] powerpc/ftrace: Use FTRACE_REGS_ADDR to identify the correct ftrace trampoline Instead of keying off DYNAMIC_FTRACE_WITH_REGS, use FTRACE_REGS_ADDR to identify the proper ftrace trampoline address to use. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/6045a280a57a7ea937a5bb13ccac747026dbfb07.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index f117124c30325..5aa36272617a0 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -745,14 +745,9 @@ int __init ftrace_dyn_arch_init(void) }; #endif - unsigned long addr; + unsigned long addr = FTRACE_REGS_ADDR; long reladdr; - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - addr = ppc_global_function_entry((void *)ftrace_regs_caller); - else - addr = ppc_global_function_entry((void *)ftrace_caller); - if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { for (i = 0; i < 2; i++) { reladdr = addr - (unsigned long)tramp[i]; From f3993a0330e2d11e42c095810c6c33084024df46 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:24 +0530 Subject: [PATCH 101/135] powerpc/ftrace: Extend ftrace support for large kernels to ppc32 Commit 67361cf8071286 ("powerpc/ftrace: Handle large kernel configs") added ftrace support for ppc64 kernel images with a text section larger than 32MB. The approach itself isn't specific to ppc64, so extend the same to also work on ppc32. While at it, reduce the space reserved for the stub from 64 bytes to 32 bytes since the different stub variants are all less than 8 instructions. To reduce use of #ifdef, a stub implementation is provided for kernel_toc_address() and -SZ_2G is cast to 'long long' to prevent errors on ppc32. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/9fa3258cbb9105cf8a0a8135214d44ffbc75fe84.1687166935.git.naveen@kernel.org --- arch/powerpc/include/asm/ftrace.h | 10 +++++-- arch/powerpc/include/asm/sections.h | 2 ++ arch/powerpc/kernel/trace/ftrace.c | 39 ++++++++++++++------------ arch/powerpc/kernel/trace/ftrace_low.S | 6 ++-- arch/powerpc/kernel/vmlinux.lds.S | 4 --- 5 files changed, 32 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 2edc6269b1a35..702aaf2efa966 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -124,15 +124,19 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return get_paca()->ftrace_enabled; } - -void ftrace_free_init_tramp(void); #else /* CONFIG_PPC64 */ static inline void this_cpu_disable_ftrace(void) { } static inline void this_cpu_enable_ftrace(void) { } static inline void this_cpu_set_ftrace_enabled(u8 ftrace_enabled) { } static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } -static inline void ftrace_free_init_tramp(void) { } #endif /* CONFIG_PPC64 */ + +#ifdef CONFIG_FUNCTION_TRACER +extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +void ftrace_free_init_tramp(void); +#else +static inline void ftrace_free_init_tramp(void) { } +#endif #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_FTRACE */ diff --git a/arch/powerpc/include/asm/sections.h b/arch/powerpc/include/asm/sections.h index 4e1f548c8d373..ea26665f82cfc 100644 --- a/arch/powerpc/include/asm/sections.h +++ b/arch/powerpc/include/asm/sections.h @@ -74,6 +74,8 @@ static inline int overlaps_kernel_text(unsigned long start, unsigned long end) (unsigned long)_stext < end; } +#else +static inline unsigned long kernel_toc_addr(void) { BUILD_BUG(); return -1UL; } #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 5aa36272617a0..913c7aa63d3fa 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -707,11 +707,6 @@ void arch_ftrace_update_code(int command) ftrace_modify_all_code(command); } -#ifdef CONFIG_PPC64 -#define PACATOC offsetof(struct paca_struct, kernel_toc) - -extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; - void ftrace_free_init_tramp(void) { int i; @@ -725,28 +720,30 @@ void ftrace_free_init_tramp(void) int __init ftrace_dyn_arch_init(void) { - int i; unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; -#ifdef CONFIG_PPC_KERNEL_PCREL + unsigned long addr = FTRACE_REGS_ADDR; + long reladdr; + int i; u32 stub_insns[] = { +#ifdef CONFIG_PPC_KERNEL_PCREL /* pla r12,addr */ PPC_PREFIX_MLS | __PPC_PRFX_R(1), PPC_INST_PADDI | ___PPC_RT(_R12), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR() - }; -#else - u32 stub_insns[] = { - PPC_RAW_LD(_R12, _R13, PACATOC), +#elif defined(CONFIG_PPC64) + PPC_RAW_LD(_R12, _R13, offsetof(struct paca_struct, kernel_toc)), PPC_RAW_ADDIS(_R12, _R12, 0), PPC_RAW_ADDI(_R12, _R12, 0), PPC_RAW_MTCTR(_R12), PPC_RAW_BCTR() - }; +#else + PPC_RAW_LIS(_R12, 0), + PPC_RAW_ADDI(_R12, _R12, 0), + PPC_RAW_MTCTR(_R12), + PPC_RAW_BCTR() #endif - - unsigned long addr = FTRACE_REGS_ADDR; - long reladdr; + }; if (IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { for (i = 0; i < 2; i++) { @@ -763,10 +760,10 @@ int __init ftrace_dyn_arch_init(void) tramp[i][1] |= IMM_L(reladdr); add_ftrace_tramp((unsigned long)tramp[i]); } - } else { + } else if (IS_ENABLED(CONFIG_PPC64)) { reladdr = addr - kernel_toc_addr(); - if (reladdr >= (long)SZ_2G || reladdr < -(long)SZ_2G) { + if (reladdr >= (long)SZ_2G || reladdr < -(long long)SZ_2G) { pr_err("Address of %ps out of range of kernel_toc.\n", (void *)addr); return -1; @@ -778,11 +775,17 @@ int __init ftrace_dyn_arch_init(void) tramp[i][2] |= PPC_LO(reladdr); add_ftrace_tramp((unsigned long)tramp[i]); } + } else { + for (i = 0; i < 2; i++) { + memcpy(tramp[i], stub_insns, sizeof(stub_insns)); + tramp[i][0] |= PPC_HA(addr); + tramp[i][1] |= PPC_LO(addr); + add_ftrace_tramp((unsigned long)tramp[i]); + } } return 0; } -#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER void ftrace_graph_func(unsigned long ip, unsigned long parent_ip, diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_low.S index 5e271f87f7990..3c66f19d17d09 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_low.S @@ -10,19 +10,17 @@ #include #include -#ifdef CONFIG_PPC64 .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: - .space 64 + .space 32 .popsection .pushsection ".tramp.ftrace.init","aw",@progbits; .globl ftrace_tramp_init ftrace_tramp_init: - .space 64 + .space 32 .popsection -#endif _GLOBAL(mcount) _GLOBAL(_mcount) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 13614f0b269cf..1c5970df32336 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -107,9 +107,7 @@ SECTIONS #endif /* careful! __ftr_alt_* sections need to be close to .text */ *(.text.hot .text.hot.* TEXT_MAIN .text.fixup .text.unlikely .text.unlikely.* .fixup __ftr_alt_* .ref.text); -#ifdef CONFIG_PPC64 *(.tramp.ftrace.text); -#endif NOINSTR_TEXT SCHED_TEXT LOCK_TEXT @@ -276,9 +274,7 @@ SECTIONS */ . = ALIGN(PAGE_SIZE); _einittext = .; -#ifdef CONFIG_PPC64 *(.tramp.ftrace.init); -#endif } :text /* .exit.text is discarded at runtime, not link time, From bad90aa52d9a0141c41e00ccd4c40be30a29acc6 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:25 +0530 Subject: [PATCH 102/135] powerpc/ftrace: Consolidate ftrace support into fewer files ftrace_low.S has just the _mcount stub and return_to_handler(). Merge this back into ftrace_mprofile.S and ftrace_64_pg.S to keep all ftrace code together, and to allow those to evolve independently. ftrace_mprofile.S is also not an entirely accurate name since this also holds ppc32 code. This will be all the more incorrect once support for -fpatchable-function-entry is added. Rename files here to more accurately describe the code: - ftrace_mprofile.S is renamed to ftrace_entry.S - ftrace_pg.c is renamed to ftrace_64_pg.c - ftrace_64_pg.S is rename to ftrace_64_pg_entry.S Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b900c9a8bba9d6c3c295e0f99886acf3e5bf6f7b.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/Makefile | 17 +++-- arch/powerpc/kernel/trace/ftrace_64_pg.S | 66 ------------------- .../trace/{ftrace_pg.c => ftrace_64_pg.c} | 0 .../{ftrace_low.S => ftrace_64_pg_entry.S} | 58 +++++++++++++++- .../{ftrace_mprofile.S => ftrace_entry.S} | 66 +++++++++++++++++++ 5 files changed, 131 insertions(+), 76 deletions(-) delete mode 100644 arch/powerpc/kernel/trace/ftrace_64_pg.S rename arch/powerpc/kernel/trace/{ftrace_pg.c => ftrace_64_pg.c} (100%) rename arch/powerpc/kernel/trace/{ftrace_low.S => ftrace_64_pg_entry.S} (55%) rename arch/powerpc/kernel/trace/{ftrace_mprofile.S => ftrace_entry.S} (83%) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 342a2d1ae86cd..125f4ca588b98 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -6,16 +6,15 @@ ifdef CONFIG_FUNCTION_TRACER # do not trace tracer code CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_ftrace_pg.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ftrace.o +obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_mprofile.o ftrace.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_pg.o +obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o endif -obj-$(CONFIG_FUNCTION_TRACER) += ftrace_low.o obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) @@ -26,7 +25,7 @@ GCOV_PROFILE_ftrace.o := n KCOV_INSTRUMENT_ftrace.o := n KCSAN_SANITIZE_ftrace.o := n UBSAN_SANITIZE_ftrace.o := n -GCOV_PROFILE_ftrace_pg.o := n -KCOV_INSTRUMENT_ftrace_pg.o := n -KCSAN_SANITIZE_ftrace_pg.o := n -UBSAN_SANITIZE_ftrace_pg.o := n +GCOV_PROFILE_ftrace_64_pg.o := n +KCOV_INSTRUMENT_ftrace_64_pg.o := n +KCSAN_SANITIZE_ftrace_64_pg.o := n +UBSAN_SANITIZE_ftrace_64_pg.o := n diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.S b/arch/powerpc/kernel/trace/ftrace_64_pg.S deleted file mode 100644 index cdbcb5a0783b3..0000000000000 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.S +++ /dev/null @@ -1,66 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Split from ftrace_64.S - */ - -#include -#include -#include -#include -#include - -_GLOBAL_TOC(ftrace_caller) - lbz r3, PACA_FTRACE_ENABLED(r13) - cmpdi r3, 0 - beqlr - - /* Taken from output of objdump from lib64/glibc */ - mflr r3 - ld r11, 0(r1) - stdu r1, -112(r1) - std r3, 128(r1) - ld r4, 16(r11) - subi r3, r3, MCOUNT_INSN_SIZE -.globl ftrace_call -ftrace_call: - bl ftrace_stub - nop -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -.globl ftrace_graph_call -ftrace_graph_call: - b ftrace_graph_stub -_GLOBAL(ftrace_graph_stub) -#endif - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - -_GLOBAL(ftrace_stub) - blr - -#ifdef CONFIG_FUNCTION_GRAPH_TRACER -_GLOBAL(ftrace_graph_caller) - addi r5, r1, 112 - /* load r4 with local address */ - ld r4, 128(r1) - subi r4, r4, MCOUNT_INSN_SIZE - - /* Grab the LR out of the caller stack frame */ - ld r11, 112(r1) - ld r3, 16(r11) - - bl prepare_ftrace_return - nop - - /* - * prepare_ftrace_return gives us the address we divert to. - * Change the LR in the callers stack frame to this. - */ - ld r11, 112(r1) - std r3, 16(r11) - - ld r0, 128(r1) - mtlr r0 - addi r1, r1, 112 - blr -#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/powerpc/kernel/trace/ftrace_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c similarity index 100% rename from arch/powerpc/kernel/trace/ftrace_pg.c rename to arch/powerpc/kernel/trace/ftrace_64_pg.c diff --git a/arch/powerpc/kernel/trace/ftrace_low.S b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S similarity index 55% rename from arch/powerpc/kernel/trace/ftrace_low.S rename to arch/powerpc/kernel/trace/ftrace_64_pg_entry.S index 3c66f19d17d09..a8a7f28404c8f 100644 --- a/arch/powerpc/kernel/trace/ftrace_low.S +++ b/arch/powerpc/kernel/trace/ftrace_64_pg_entry.S @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0-or-later */ /* - * Split from entry_64.S + * Split from ftrace_64.S */ #include @@ -10,6 +10,62 @@ #include #include +_GLOBAL_TOC(ftrace_caller) + lbz r3, PACA_FTRACE_ENABLED(r13) + cmpdi r3, 0 + beqlr + + /* Taken from output of objdump from lib64/glibc */ + mflr r3 + ld r11, 0(r1) + stdu r1, -112(r1) + std r3, 128(r1) + ld r4, 16(r11) + subi r3, r3, MCOUNT_INSN_SIZE +.globl ftrace_call +ftrace_call: + bl ftrace_stub + nop +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +.globl ftrace_graph_call +ftrace_graph_call: + b ftrace_graph_stub +_GLOBAL(ftrace_graph_stub) +#endif + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + +_GLOBAL(ftrace_stub) + blr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(ftrace_graph_caller) + addi r5, r1, 112 + /* load r4 with local address */ + ld r4, 128(r1) + subi r4, r4, MCOUNT_INSN_SIZE + + /* Grab the LR out of the caller stack frame */ + ld r11, 112(r1) + ld r3, 16(r11) + + bl prepare_ftrace_return + nop + + /* + * prepare_ftrace_return gives us the address we divert to. + * Change the LR in the callers stack frame to this. + */ + ld r11, 112(r1) + std r3, 16(r11) + + ld r0, 128(r1) + mtlr r0 + addi r1, r1, 112 + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_entry.S similarity index 83% rename from arch/powerpc/kernel/trace/ftrace_mprofile.S rename to arch/powerpc/kernel/trace/ftrace_entry.S index 15fc75ffff32e..4e7103c316fb0 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -3,6 +3,7 @@ * Split from ftrace_64.S */ +#include #include #include #include @@ -248,3 +249,68 @@ livepatch_handler: /* Return to original caller of live patched function */ blr #endif /* CONFIG_LIVEPATCH */ + +_GLOBAL(mcount) +_GLOBAL(_mcount) +EXPORT_SYMBOL(_mcount) + mflr r12 + mtctr r12 + mtlr r0 + bctr + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +_GLOBAL(return_to_handler) + /* need to save return values */ +#ifdef CONFIG_PPC64 + std r4, -32(r1) + std r3, -24(r1) + /* save TOC */ + std r2, -16(r1) + std r31, -8(r1) + mr r31, r1 + stdu r1, -112(r1) + + /* + * We might be called from a module. + * Switch to our TOC to run inside the core kernel. + */ + LOAD_PACA_TOC() +#else + stwu r1, -16(r1) + stw r3, 8(r1) + stw r4, 12(r1) +#endif + + bl ftrace_return_to_handler + nop + + /* return value has real return address */ + mtlr r3 + +#ifdef CONFIG_PPC64 + ld r1, 0(r1) + ld r4, -32(r1) + ld r3, -24(r1) + ld r2, -16(r1) + ld r31, -8(r1) +#else + lwz r3, 8(r1) + lwz r4, 12(r1) + addi r1, r1, 16 +#endif + + /* Jump back to real return address */ + blr +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + +.pushsection ".tramp.ftrace.text","aw",@progbits; +.globl ftrace_tramp_text +ftrace_tramp_text: + .space 32 +.popsection + +.pushsection ".tramp.ftrace.init","aw",@progbits; +.globl ftrace_tramp_init +ftrace_tramp_init: + .space 32 +.popsection From f4fcbf2e093e25a7faa8a3c2a5097524114e9547 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:26 +0530 Subject: [PATCH 103/135] powerpc/ftrace: Refactor ftrace_modify_code() Split up ftrace_modify_code() into a few helpers for future use. Also update error messages accordingly. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/a8daa49712b44ff539e6c22a2ea649a540386798.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 47 +++++++++++++++++------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 913c7aa63d3fa..ef4e49c2c3778 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -50,32 +50,39 @@ ftrace_call_replace(unsigned long ip, unsigned long addr, int link) return op; } -static inline int -ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op) { - ppc_inst_t replaced; + if (copy_inst_from_kernel_nofault(op, (void *)ip)) { + pr_err("0x%lx: fetching instruction failed\n", ip); + return -EFAULT; + } - /* - * Note: - * We are paranoid about modifying text, as if a bug was to happen, it - * could cause us to read or write to someplace that could cause harm. - * Carefully read and modify the code with probe_kernel_*(), and make - * sure what we read is what we expected it to be before modifying it. - */ + return 0; +} - /* read the text we want to modify */ - if (copy_inst_from_kernel_nofault(&replaced, (void *)ip)) - return -EFAULT; +static inline int ftrace_validate_inst(unsigned long ip, ppc_inst_t inst) +{ + ppc_inst_t op; + int ret; - /* Make sure it is what we expect it to be */ - if (!ppc_inst_equal(replaced, old)) { - pr_err("%p: replaced (%08lx) != old (%08lx)", (void *)ip, - ppc_inst_as_ulong(replaced), ppc_inst_as_ulong(old)); - return -EINVAL; + ret = ftrace_read_inst(ip, &op); + if (!ret && !ppc_inst_equal(op, inst)) { + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", + ip, ppc_inst_as_ulong(inst), ppc_inst_as_ulong(op)); + ret = -EINVAL; } - /* replace the text with the new text */ - return patch_instruction((u32 *)ip, new); + return ret; +} + +static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_t new) +{ + int ret = ftrace_validate_inst(ip, old); + + if (!ret) + ret = patch_instruction((u32 *)ip, new); + + return ret; } /* From 33bb8a0be9c826fce545ae390ecaf91e96b5db43 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:27 +0530 Subject: [PATCH 104/135] powerpc/ftrace: Stop re-purposing linker generated long branches for ftrace Commit 67361cf8071286 ("powerpc/ftrace: Handle large kernel configs") added ftrace support for ppc64 kernel images with a text section larger than 32MB. The patch did two things: 1. Add stubs at the end of .text to branch into ftrace_[regs_]caller for functions that were out of branch range. 2. Re-purpose linker-generated long branches to _mcount to instead branch to ftrace_[regs_]caller. Before that, we only supported kernel .text up to ~32MB. With the above, we now support up to ~96MB: - The first 32MB of kernel text can branch directly into ftrace_[regs_]caller since that symbol is usually at the beginning. - The modified long_branch from (2) above is used by the next 32MB of kernel text. - The next 32MB of kernel text can use the stub at the end of text to branch back to ftrace_[regs_]caller. While re-purposing the long branch works in practice, it still restricts ftrace to kernel text up to ~96MB. The stub at the end of kernel text from (1) already enables us to extend ftrace support for kernel text up to 64MB, which fulfils the original requirement. Further, once we switch to -fpatchable-function-entry, there will not be a long branch that we can use. Stop re-purposing the linker-generated long branches for ftrace to simplify the code. If there are good reasons to support ftrace on kernels beyond 64MB, we can consider adding support by using -fpatchable-function-entry. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/33fa3be97f8e1f2171254ef2e1b0d5c8836c11fd.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 110 +++++------------------------ 1 file changed, 17 insertions(+), 93 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index ef4e49c2c3778..278bf8e52b6e8 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -28,13 +28,7 @@ #include #include -/* - * We generally only have a single long_branch tramp and at most 2 or 3 plt - * tramps generated. But, we don't use the plt tramps currently. We also allot - * 2 tramps after .text and .init.text. So, we only end up with around 3 usable - * tramps in total. Set aside 8 just to be sure. - */ -#define NUM_FTRACE_TRAMPS 8 +#define NUM_FTRACE_TRAMPS 2 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; static ppc_inst_t @@ -100,11 +94,6 @@ static int is_bl_op(ppc_inst_t op) return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); } -static int is_b_op(ppc_inst_t op) -{ - return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BRANCH(0); -} - static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) { int offset; @@ -227,11 +216,7 @@ static unsigned long find_ftrace_tramp(unsigned long ip) { int i; - /* - * We have the compiler generated long_branch tramps at the end - * and we prefer those - */ - for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--) + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) if (!ftrace_tramps[i]) continue; else if (is_offset_in_branch_range(ftrace_tramps[i] - ip)) @@ -240,75 +225,6 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } -static int add_ftrace_tramp(unsigned long tramp) -{ - int i; - - for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (!ftrace_tramps[i]) { - ftrace_tramps[i] = tramp; - return 0; - } - - return -1; -} - -/* - * If this is a compiler generated long_branch trampoline (essentially, a - * trampoline that has a branch to _mcount()), we re-write the branch to - * instead go to ftrace_[regs_]caller() and note down the location of this - * trampoline. - */ -static int setup_mcount_compiler_tramp(unsigned long tramp) -{ - int i; - ppc_inst_t op; - unsigned long ptr; - - /* Is this a known long jump tramp? */ - for (i = 0; i < NUM_FTRACE_TRAMPS; i++) - if (ftrace_tramps[i] == tramp) - return 0; - - /* New trampoline -- read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)tramp)) { - pr_debug("Fetching opcode failed.\n"); - return -1; - } - - /* Is this a 24 bit branch? */ - if (!is_b_op(op)) { - pr_debug("Trampoline is not a long branch tramp.\n"); - return -1; - } - - /* lets find where the pointer goes */ - ptr = find_bl_target(tramp, op); - - if (ptr != ppc_global_function_entry((void *)_mcount)) { - pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr); - return -1; - } - - /* Let's re-write the tramp to go to ftrace_[regs_]caller */ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - ptr = ppc_global_function_entry((void *)ftrace_regs_caller); - else - ptr = ppc_global_function_entry((void *)ftrace_caller); - - if (patch_branch((u32 *)tramp, ptr, 0)) { - pr_debug("REL24 out of range!\n"); - return -1; - } - - if (add_ftrace_tramp(tramp)) { - pr_debug("No tramp locations left\n"); - return -1; - } - - return 0; -} - static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; @@ -331,13 +247,10 @@ static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) pr_devel("ip:%lx jumps to %lx", ip, tramp); - if (setup_mcount_compiler_tramp(tramp)) { - /* Are other trampolines reachable? */ - if (!find_ftrace_tramp(ip)) { - pr_err("No ftrace trampolines reachable from %ps\n", - (void *)ip); - return -EINVAL; - } + /* Are ftrace trampolines reachable? */ + if (!find_ftrace_tramp(ip)) { + pr_err("No ftrace trampolines reachable from %ps\n", (void *)ip); + return -EINVAL; } if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { @@ -725,6 +638,17 @@ void ftrace_free_init_tramp(void) } } +static void __init add_ftrace_tramp(unsigned long tramp) +{ + int i; + + for (i = 0; i < NUM_FTRACE_TRAMPS; i++) + if (!ftrace_tramps[i]) { + ftrace_tramps[i] = tramp; + return; + } +} + int __init ftrace_dyn_arch_init(void) { unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init }; From cc93b9233230312a8a905fabd590c405d60f9edd Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:28 +0530 Subject: [PATCH 105/135] powerpc/ftrace: Add separate ftrace_init_nop() with additional validation Currently, we validate instructions around the ftrace location every time we have to enable/disable ftrace. Introduce ftrace_init_nop() to instead perform all the validation during ftrace initialization. This allows us to simply patch the necessary instructions during enabling/disabling ftrace. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/f373684081e8e98be09b7f44d2d93069768324dc.1687166935.git.naveen@kernel.org --- arch/powerpc/include/asm/ftrace.h | 6 +++ arch/powerpc/kernel/trace/ftrace.c | 71 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 702aaf2efa966..ef9f0b97670d1 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -29,11 +29,17 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, unsigned long sp); +struct module; +struct dyn_ftrace; struct dyn_arch_ftrace { struct module *mod; }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS +#define ftrace_need_init_nop() (true) +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop + struct ftrace_regs { struct pt_regs regs; }; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 278bf8e52b6e8..98bd099c428ee 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -31,6 +31,16 @@ #define NUM_FTRACE_TRAMPS 2 static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS]; +static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr, int link) +{ + ppc_inst_t op; + + WARN_ON(!is_offset_in_branch_range(addr - ip)); + create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); + + return op; +} + static ppc_inst_t ftrace_call_replace(unsigned long ip, unsigned long addr, int link) { @@ -597,6 +607,67 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } #endif +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + unsigned long addr, ip = rec->ip; + ppc_inst_t old, new; + int ret = 0; + + /* Verify instructions surrounding the ftrace location */ + if (IS_ENABLED(CONFIG_PPC32)) { + /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ + ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); + if (!ret) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4))); + } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { + /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ + ret = ftrace_read_inst(ip - 4, &old); + if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); + ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16))); + } + } else { + return -EINVAL; + } + + if (ret) + return ret; + + if (!core_kernel_text(ip)) { + if (!mod) { + pr_err("0x%lx: No module provided for non-kernel address\n", ip); + return -EFAULT; + } + rec->arch.mod = mod; + } + + /* Nop-out the ftrace location */ + new = ppc_inst(PPC_RAW_NOP()); + addr = MCOUNT_ADDR; + if (is_offset_in_branch_range(addr - ip)) { + /* Within range */ + old = ftrace_create_branch_inst(ip, addr, 1); + ret = ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip) || (IS_ENABLED(CONFIG_MODULES) && mod)) { + /* + * We would be branching to a linker-generated stub, or to the module _mcount + * stub. Let's just confirm we have a 'bl' here. + */ + ret = ftrace_read_inst(ip, &old); + if (ret) + return ret; + if (!is_bl_op(old)) { + pr_err("0x%lx: expected (bl) != found (%08lx)\n", ip, ppc_inst_as_ulong(old)); + return -EINVAL; + } + ret = patch_instruction((u32 *)ip, new); + } else { + return -EINVAL; + } + + return ret; +} + int ftrace_update_ftrace_func(ftrace_func_t func) { unsigned long ip = (unsigned long)(&ftrace_call); From 562bde0bfc968d212d10ba6bf921a0774feebbac Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:29 +0530 Subject: [PATCH 106/135] powerpc/ftrace: Simplify ftrace_make_nop() Now that we validate the ftrace location during initialization in ftrace_init_nop(), we can simplify ftrace_make_nop() to patch-in the nop without worrying about the instructions surrounding the ftrace location. Note that we continue to ensure that we have a bl to ftrace_[regs_]caller at the ftrace location before nop-ing it out. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/e12ccbf28c50c3a07fb614f4d392e55f7098a729.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 220 +++++------------------------ 1 file changed, 32 insertions(+), 188 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 98bd099c428ee..05153a1038fdf 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -116,112 +116,6 @@ static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) return ip + (long)offset; } -#ifdef CONFIG_MODULES -static int -__ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long entry, ptr, tramp; - unsigned long ip = rec->ip; - ppc_inst_t op, pop; - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { - if (copy_inst_from_kernel_nofault(&op, (void *)(ip - 4))) { - pr_err("Fetching instruction at %lx failed.\n", ip - 4); - return -EFAULT; - } - - /* We expect either a mflr r0, or a std r0, LRSAVE(r1) */ - if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_MFLR(_R0))) && - !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) { - pr_err("Unexpected instruction %08lx around bl _mcount\n", - ppc_inst_as_ulong(op)); - return -EINVAL; - } - } else if (IS_ENABLED(CONFIG_PPC64)) { - /* - * Check what is in the next instruction. We can see ld r2,40(r1), but - * on first pass after boot we will see mflr r0. - */ - if (copy_inst_from_kernel_nofault(&op, (void *)(ip + 4))) { - pr_err("Fetching op failed.\n"); - return -EFAULT; - } - - if (!ppc_inst_equal(op, ppc_inst(PPC_INST_LD_TOC))) { - pr_err("Expected %08lx found %08lx\n", PPC_INST_LD_TOC, - ppc_inst_as_ulong(op)); - return -EINVAL; - } - } - - /* - * When using -mprofile-kernel or PPC32 there is no load to jump over. - * - * Otherwise our original call site looks like: - * - * bl - * ld r2,XX(r1) - * - * Milton Miller pointed out that we can not simply nop the branch. - * If a task was preempted when calling a trace function, the nops - * will remove the way to restore the TOC in r2 and the r2 TOC will - * get corrupted. - * - * Use a b +8 to jump over the load. - * XXX: could make PCREL depend on MPROFILE_KERNEL - * XXX: check PCREL && MPROFILE_KERNEL calling sequence - */ - if (IS_ENABLED(CONFIG_MPROFILE_KERNEL) || IS_ENABLED(CONFIG_PPC32)) - pop = ppc_inst(PPC_RAW_NOP()); - else - pop = ppc_inst(PPC_RAW_BRANCH(8)); /* b +8 */ - - if (patch_instruction((u32 *)ip, pop)) { - pr_err("Patching NOP failed.\n"); - return -EPERM; - } - - return 0; -} -#else -static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) -{ - return 0; -} -#endif /* CONFIG_MODULES */ - static unsigned long find_ftrace_tramp(unsigned long ip) { int i; @@ -235,88 +129,6 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } -static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long tramp, ip = rec->ip; - ppc_inst_t op; - - /* Read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); - return -EINVAL; - } - - /* Let's find where the pointer goes */ - tramp = find_bl_target(ip, op); - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - - /* Are ftrace trampolines reachable? */ - if (!find_ftrace_tramp(ip)) { - pr_err("No ftrace trampolines reachable from %ps\n", (void *)ip); - return -EINVAL; - } - - if (patch_instruction((u32 *)ip, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Patching NOP failed.\n"); - return -EPERM; - } - - return 0; -} - -int ftrace_make_nop(struct module *mod, - struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - ppc_inst_t old, new; - - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ftrace_call_replace(ip, addr, 1); - new = ppc_inst(PPC_RAW_NOP()); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - return __ftrace_make_nop_kernel(rec, addr); - } else if (!IS_ENABLED(CONFIG_MODULES)) { - return -EINVAL; - } - - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; - - return __ftrace_make_nop(mod, rec, addr); -} - #ifdef CONFIG_MODULES /* * Examine the existing instructions for __ftrace_make_call. @@ -607,6 +419,38 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } #endif +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long tramp, ip = rec->ip; + ppc_inst_t old, new; + + /* Nop-out the ftrace location */ + new = ppc_inst(PPC_RAW_NOP()); + if (is_offset_in_branch_range(addr - ip)) { + /* Within range */ + old = ftrace_create_branch_inst(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + /* We would be branching to one of our ftrace tramps */ + tramp = find_ftrace_tramp(ip); + if (!tramp) { + pr_err("0x%lx: No ftrace trampolines reachable\n", ip); + return -EINVAL; + } + old = ftrace_create_branch_inst(ip, tramp, 1); + return ftrace_modify_code(ip, old, new); + } else if (IS_ENABLED(CONFIG_MODULES)) { + /* Module code would be going to one of the module stubs */ + if (!mod) + mod = rec->arch.mod; + tramp = (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); + old = ftrace_create_branch_inst(ip, tramp, 1); + return ftrace_modify_code(ip, old, new); + } + + return -EINVAL; +} + int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) { unsigned long addr, ip = rec->ip; From 9365e23b15f28b7b3b333a7fc6f4c8e9464ca99f Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:30 +0530 Subject: [PATCH 107/135] powerpc/ftrace: Simplify ftrace_make_call() Now that we validate the ftrace location during initialization in ftrace_init_nop(), we can simplify ftrace_make_call() to replace the nop without worrying about the instructions surrounding the ftrace location. Note that we continue to ensure that we have a nop at the ftrace location before patching it. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/2d28866d2f556488a663981abe5621511efb207b.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 187 +++++------------------------ 1 file changed, 31 insertions(+), 156 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 05153a1038fdf..6ea8b90246a54 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -129,162 +129,6 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } -#ifdef CONFIG_MODULES -/* - * Examine the existing instructions for __ftrace_make_call. - * They should effectively be a NOP, and follow formal constraints, - * depending on the ABI. Return false if they don't. - */ -static bool expected_nop_sequence(void *ip, ppc_inst_t op0, ppc_inst_t op1) -{ - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_NOP())); - else - return ppc_inst_equal(op0, ppc_inst(PPC_RAW_BRANCH(8))) && - ppc_inst_equal(op1, ppc_inst(PPC_INST_LD_TOC)); -} - -static int -__ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - ppc_inst_t op[2]; - void *ip = (void *)rec->ip; - unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(op, ip)) - return -EFAULT; - - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && - copy_inst_from_kernel_nofault(op + 1, ip + 4)) - return -EFAULT; - - if (!expected_nop_sequence(ip, op[0], op[1])) { - pr_err("Unexpected call sequence at %p: %08lx %08lx\n", ip, - ppc_inst_as_ulong(op[0]), ppc_inst_as_ulong(op[1])); - return -EINVAL; - } - - /* If we never set up ftrace trampoline(s), then bail */ - if (!mod->arch.tramp || - (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !mod->arch.tramp_regs)) { - pr_err("No ftrace trampoline\n"); - return -EINVAL; - } - - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && rec->flags & FTRACE_FL_REGS) - tramp = mod->arch.tramp_regs; - else - tramp = mod->arch.tramp; - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } - - return 0; -} -#else -static int __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - return 0; -} -#endif /* CONFIG_MODULES */ - -static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr) -{ - ppc_inst_t op; - void *ip = (void *)rec->ip; - unsigned long tramp, entry, ptr; - - /* Make sure we're being asked to patch branch to a known ftrace addr */ - entry = ppc_global_function_entry((void *)ftrace_caller); - ptr = ppc_global_function_entry((void *)addr); - - if (ptr != entry && IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) - entry = ppc_global_function_entry((void *)ftrace_regs_caller); - - if (ptr != entry) { - pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr); - return -EINVAL; - } - - /* Make sure we have a nop */ - if (copy_inst_from_kernel_nofault(&op, ip)) { - pr_err("Unable to read ftrace location %p\n", ip); - return -EFAULT; - } - - if (!ppc_inst_equal(op, ppc_inst(PPC_RAW_NOP()))) { - pr_err("Unexpected call sequence at %p: %08lx\n", - ip, ppc_inst_as_ulong(op)); - return -EINVAL; - } - - tramp = find_ftrace_tramp((unsigned long)ip); - if (!tramp) { - pr_err("No ftrace trampolines reachable from %ps\n", ip); - return -EINVAL; - } - - if (patch_branch(ip, tramp, BRANCH_SET_LINK)) { - pr_err("Error patching branch to ftrace tramp!\n"); - return -EINVAL; - } - - return 0; -} - -int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) -{ - unsigned long ip = rec->ip; - ppc_inst_t old, new; - - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - old = ppc_inst(PPC_RAW_NOP()); - new = ftrace_call_replace(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - return __ftrace_make_call_kernel(rec, addr); - } else if (!IS_ENABLED(CONFIG_MODULES)) { - /* We should not get here without modules */ - return -EINVAL; - } - - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - - return __ftrace_make_call(rec, addr); -} - #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS #ifdef CONFIG_MODULES static int @@ -419,6 +263,37 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, } #endif +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long tramp, ip = rec->ip; + ppc_inst_t old, new; + struct module *mod; + + old = ppc_inst(PPC_RAW_NOP()); + if (is_offset_in_branch_range(addr - ip)) { + /* Within range */ + new = ftrace_create_branch_inst(ip, addr, 1); + return ftrace_modify_code(ip, old, new); + } else if (core_kernel_text(ip)) { + /* We would be branching to one of our ftrace tramps */ + tramp = find_ftrace_tramp(ip); + if (!tramp) { + pr_err("0x%lx: No ftrace trampolines reachable\n", ip); + return -EINVAL; + } + new = ftrace_create_branch_inst(ip, tramp, 1); + return ftrace_modify_code(ip, old, new); + } else if (IS_ENABLED(CONFIG_MODULES)) { + /* Module code would be going to one of the module stubs */ + mod = rec->arch.mod; + tramp = (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); + new = ftrace_create_branch_inst(ip, tramp, 1); + return ftrace_modify_code(ip, old, new); + } + + return -EINVAL; +} + int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { unsigned long tramp, ip = rec->ip; From 67385738e3c248673668663ffb434ae4e0abf7f1 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:31 +0530 Subject: [PATCH 108/135] powerpc/ftrace: Simplify ftrace_modify_call() Now that we validate the ftrace location during initialization in ftrace_init_nop(), we can simplify ftrace_modify_call() to patch-in the updated branch instruction without worrying about the instructions surrounding the ftrace location. Note that we continue to ensure we have the expected branch instruction at the ftrace location before patching it with the updated branch destination. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/06275720939f8ee4c2f61c9e9a3e89b1fa3c441d.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 161 ++++------------------------- 1 file changed, 21 insertions(+), 140 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 6ea8b90246a54..c37e22c6c2652 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -89,33 +89,11 @@ static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_ return ret; } -/* - * Helper functions that are the same for both PPC64 and PPC32. - */ -static int test_24bit_addr(unsigned long ip, unsigned long addr) -{ - addr = ppc_function_entry((void *)addr); - - return is_offset_in_branch_range(addr - ip); -} - static int is_bl_op(ppc_inst_t op) { return (ppc_inst_val(op) & ~PPC_LI_MASK) == PPC_RAW_BL(0); } -static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) -{ - int offset; - - offset = PPC_LI(ppc_inst_val(op)); - /* make it signed */ - if (offset & 0x02000000) - offset |= 0xfe000000; - - return ip + (long)offset; -} - static unsigned long find_ftrace_tramp(unsigned long ip) { int i; @@ -130,115 +108,16 @@ static unsigned long find_ftrace_tramp(unsigned long ip) } #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -#ifdef CONFIG_MODULES -static int -__ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { - ppc_inst_t op; - unsigned long ip = rec->ip; - unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; - - /* If we never set up ftrace trampolines, then bail */ - if (!mod->arch.tramp || !mod->arch.tramp_regs) { - pr_err("No ftrace trampoline\n"); - return -EINVAL; - } - - /* read where this goes */ - if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { - pr_err("Fetching opcode failed.\n"); - return -EFAULT; - } - - /* Make sure that this is still a 24bit jump */ - if (!is_bl_op(op)) { - pr_err("Not expected bl: opcode is %08lx\n", ppc_inst_as_ulong(op)); - return -EINVAL; - } - - /* lets find where the pointer goes */ - tramp = find_bl_target(ip, op); - entry = ppc_global_function_entry((void *)old_addr); - - pr_devel("ip:%lx jumps to %lx", ip, tramp); - - if (tramp != entry) { - /* old_addr is not within range, so we must have used a trampoline */ - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - } - - /* The new target may be within range */ - if (test_24bit_addr(ip, addr)) { - /* within range */ - if (patch_branch((u32 *)ip, addr, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } - - return 0; - } - - if (rec->flags & FTRACE_FL_REGS) - tramp = mod->arch.tramp_regs; - else - tramp = mod->arch.tramp; - - if (module_trampoline_target(mod, tramp, &ptr)) { - pr_err("Failed to get trampoline target\n"); - return -EFAULT; - } - - pr_devel("trampoline target %lx", ptr); - - entry = ppc_global_function_entry((void *)addr); - /* This should match what was called */ - if (ptr != entry) { - pr_err("addr %lx does not match expected %lx\n", ptr, entry); - return -EINVAL; - } - - if (patch_branch((u32 *)ip, tramp, BRANCH_SET_LINK)) { - pr_err("REL24 out of range!\n"); - return -EINVAL; - } - - return 0; -} -#else -static int __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) -{ - return 0; -} -#endif - -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, - unsigned long addr) -{ - unsigned long ip = rec->ip; + unsigned long tramp, tramp_old, ip = rec->ip; ppc_inst_t old, new; + struct module *mod; - /* - * If the calling address is more that 24 bits away, - * then we had to use a trampoline to make the call. - * Otherwise just update the call site. - */ - if (test_24bit_addr(ip, addr) && test_24bit_addr(ip, old_addr)) { - /* within range */ - old = ftrace_call_replace(ip, old_addr, 1); - new = ftrace_call_replace(ip, addr, 1); + if (is_offset_in_branch_range(old_addr - ip) && is_offset_in_branch_range(addr - ip)) { + /* Within range */ + old = ftrace_create_branch_inst(ip, old_addr, 1); + new = ftrace_create_branch_inst(ip, addr, 1); return ftrace_modify_code(ip, old, new); } else if (core_kernel_text(ip)) { /* @@ -246,20 +125,22 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, * variant, so there is nothing to do here */ return 0; - } else if (!IS_ENABLED(CONFIG_MODULES)) { - /* We should not get here without modules */ - return -EINVAL; - } - - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; + } else if (IS_ENABLED(CONFIG_MODULES)) { + /* Module code would be going to one of the module stubs */ + mod = rec->arch.mod; + if (addr == (unsigned long)ftrace_caller) { + tramp_old = mod->arch.tramp_regs; + tramp = mod->arch.tramp; + } else { + tramp_old = mod->arch.tramp; + tramp = mod->arch.tramp_regs; + } + old = ftrace_create_branch_inst(ip, tramp_old, 1); + new = ftrace_create_branch_inst(ip, tramp, 1); + return ftrace_modify_code(ip, old, new); } - return __ftrace_modify_call(rec, old_addr, addr); + return -EINVAL; } #endif From a26ce4272eea2b20d4f39b9d7e56daf0c77151d8 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:32 +0530 Subject: [PATCH 109/135] powerpc/ftrace: Replace use of ftrace_call_replace() with ftrace_create_branch_inst() ftrace_create_branch_inst() is clearer about its intent than ftrace_call_replace(). Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/953513b88fa922ba7a66d772dc1310710efe9177.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index c37e22c6c2652..422dd760fbe01 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -41,19 +41,6 @@ static ppc_inst_t ftrace_create_branch_inst(unsigned long ip, unsigned long addr return op; } -static ppc_inst_t -ftrace_call_replace(unsigned long ip, unsigned long addr, int link) -{ - ppc_inst_t op; - - addr = ppc_function_entry((void *)addr); - - /* if (link) set op to 'bl' else 'b' */ - create_branch(&op, (u32 *)ip, addr, link ? BRANCH_SET_LINK : 0); - - return op; -} - static inline int ftrace_read_inst(unsigned long ip, ppc_inst_t *op) { if (copy_inst_from_kernel_nofault(op, (void *)ip)) { @@ -275,14 +262,14 @@ int ftrace_update_ftrace_func(ftrace_func_t func) int ret; old = ppc_inst_read((u32 *)&ftrace_call); - new = ftrace_call_replace(ip, (unsigned long)func, 1); + new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); /* Also update the regs callback function */ if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS) && !ret) { ip = (unsigned long)(&ftrace_regs_call); old = ppc_inst_read((u32 *)&ftrace_regs_call); - new = ftrace_call_replace(ip, (unsigned long)func, 1); + new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); } From c91c5a828685563c24ab8879d8386de356d9085a Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:33 +0530 Subject: [PATCH 110/135] powerpc/ftrace: Implement ftrace_replace_code() Implement ftrace_replace_code() to consolidate logic from the different ftrace patching routines: ftrace_make_nop(), ftrace_make_call() and ftrace_modify_call(). Note that ftrace_make_call() is still required primarily to handle patching modules during their load time. The other two routines should no longer be called. This lays the groundwork to enable better control in patching ftrace locations, including the ability to nop-out preceding profiling instructions when ftrace is disabled. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c28f852225646b0561bbf3c1d22d03f041ace8e0.1687166935.git.naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace.c | 165 ++++++++++++++++------------- 1 file changed, 92 insertions(+), 73 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 422dd760fbe01..cf9dce7752792 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -94,104 +94,123 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS -int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { - unsigned long tramp, tramp_old, ip = rec->ip; - ppc_inst_t old, new; - struct module *mod; + unsigned long ip = rec->ip; + unsigned long stub; - if (is_offset_in_branch_range(old_addr - ip) && is_offset_in_branch_range(addr - ip)) { + if (is_offset_in_branch_range(addr - ip)) { /* Within range */ - old = ftrace_create_branch_inst(ip, old_addr, 1); - new = ftrace_create_branch_inst(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - /* - * We always patch out of range locations to go to the regs - * variant, so there is nothing to do here - */ - return 0; - } else if (IS_ENABLED(CONFIG_MODULES)) { + stub = addr; +#ifdef CONFIG_MODULES + } else if (rec->arch.mod) { /* Module code would be going to one of the module stubs */ - mod = rec->arch.mod; - if (addr == (unsigned long)ftrace_caller) { - tramp_old = mod->arch.tramp_regs; - tramp = mod->arch.tramp; - } else { - tramp_old = mod->arch.tramp; - tramp = mod->arch.tramp_regs; + stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp : + rec->arch.mod->arch.tramp_regs); +#endif + } else if (core_kernel_text(ip)) { + /* We would be branching to one of our ftrace stubs */ + stub = find_ftrace_tramp(ip); + if (!stub) { + pr_err("0x%lx: No ftrace stubs reachable\n", ip); + return -EINVAL; } - old = ftrace_create_branch_inst(ip, tramp_old, 1); - new = ftrace_create_branch_inst(ip, tramp, 1); - return ftrace_modify_code(ip, old, new); + } else { + return -EINVAL; } + *call_inst = ftrace_create_branch_inst(ip, stub, 1); + return 0; +} + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) +{ + /* This should never be called since we override ftrace_replace_code() */ + WARN_ON(1); return -EINVAL; } #endif int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - unsigned long tramp, ip = rec->ip; ppc_inst_t old, new; - struct module *mod; + int ret; + + /* This can only ever be called during module load */ + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip))) + return -EINVAL; old = ppc_inst(PPC_RAW_NOP()); - if (is_offset_in_branch_range(addr - ip)) { - /* Within range */ - new = ftrace_create_branch_inst(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - /* We would be branching to one of our ftrace tramps */ - tramp = find_ftrace_tramp(ip); - if (!tramp) { - pr_err("0x%lx: No ftrace trampolines reachable\n", ip); - return -EINVAL; - } - new = ftrace_create_branch_inst(ip, tramp, 1); - return ftrace_modify_code(ip, old, new); - } else if (IS_ENABLED(CONFIG_MODULES)) { - /* Module code would be going to one of the module stubs */ - mod = rec->arch.mod; - tramp = (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); - new = ftrace_create_branch_inst(ip, tramp, 1); - return ftrace_modify_code(ip, old, new); - } + ret = ftrace_get_call_inst(rec, addr, &new); + if (ret) + return ret; - return -EINVAL; + return ftrace_modify_code(rec->ip, old, new); } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - unsigned long tramp, ip = rec->ip; - ppc_inst_t old, new; + /* + * This should never be called since we override ftrace_replace_code(), + * as well as ftrace_init_nop() + */ + WARN_ON(1); + return -EINVAL; +} - /* Nop-out the ftrace location */ - new = ppc_inst(PPC_RAW_NOP()); - if (is_offset_in_branch_range(addr - ip)) { - /* Within range */ - old = ftrace_create_branch_inst(ip, addr, 1); - return ftrace_modify_code(ip, old, new); - } else if (core_kernel_text(ip)) { - /* We would be branching to one of our ftrace tramps */ - tramp = find_ftrace_tramp(ip); - if (!tramp) { - pr_err("0x%lx: No ftrace trampolines reachable\n", ip); - return -EINVAL; +void ftrace_replace_code(int enable) +{ + ppc_inst_t old, new, call_inst, new_call_inst; + ppc_inst_t nop_inst = ppc_inst(PPC_RAW_NOP()); + unsigned long ip, new_addr, addr; + struct ftrace_rec_iter *iter; + struct dyn_ftrace *rec; + int ret = 0, update; + + for_ftrace_rec_iter(iter) { + rec = ftrace_rec_iter_record(iter); + ip = rec->ip; + + if (rec->flags & FTRACE_FL_DISABLED && !(rec->flags & FTRACE_FL_ENABLED)) + continue; + + addr = ftrace_get_addr_curr(rec); + new_addr = ftrace_get_addr_new(rec); + update = ftrace_update_record(rec, enable); + + switch (update) { + case FTRACE_UPDATE_IGNORE: + default: + continue; + case FTRACE_UPDATE_MODIFY_CALL: + ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); + ret |= ftrace_get_call_inst(rec, addr, &call_inst); + old = call_inst; + new = new_call_inst; + break; + case FTRACE_UPDATE_MAKE_NOP: + ret = ftrace_get_call_inst(rec, addr, &call_inst); + old = call_inst; + new = nop_inst; + break; + case FTRACE_UPDATE_MAKE_CALL: + ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + old = nop_inst; + new = call_inst; + break; } - old = ftrace_create_branch_inst(ip, tramp, 1); - return ftrace_modify_code(ip, old, new); - } else if (IS_ENABLED(CONFIG_MODULES)) { - /* Module code would be going to one of the module stubs */ - if (!mod) - mod = rec->arch.mod; - tramp = (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); - old = ftrace_create_branch_inst(ip, tramp, 1); - return ftrace_modify_code(ip, old, new); + + if (!ret) + ret = ftrace_modify_code(ip, old, new); + if (ret) + goto out; } - return -EINVAL; +out: + if (ret) + ftrace_bug(ret, rec); + return; } int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) From 0f71dcfb4aef6043da6cc509e7a7f6a3ae87c12d Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Mon, 19 Jun 2023 15:17:34 +0530 Subject: [PATCH 111/135] powerpc/ftrace: Add support for -fpatchable-function-entry GCC v13.1 updated support for -fpatchable-function-entry on ppc64le to emit nops after the local entry point, rather than before it. This allows us to use this in the kernel for ftrace purposes. A new script is added under arch/powerpc/tools/ to help detect if nops are emitted after the function local entry point, or before the global entry point. With -fpatchable-function-entry, we no longer have the profiling instructions generated at function entry, so we only need to validate the presence of two nops at the ftrace location in ftrace_init_nop(). We patch the preceding instruction with 'mflr r0' to match the -mprofile-kernel ABI for subsequent ftrace use. This changes the profiling instructions used on ppc32. The default -pg option emits an additional 'stw' instruction after 'mflr r0' and before the branch to _mcount 'bl _mcount'. This is very similar to the original -mprofile-kernel implementation on ppc64le, where an additional 'std' instruction was used to save LR to its save location in the caller's stackframe. Subsequently, this additional store was removed in later compiler versions for performance reasons. The same reasons apply for ppc32 so we only patch in a 'mflr r0'. Signed-off-by: Naveen N Rao Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/68586d22981a2c3bb45f27a2b621173d10a7d092.1687166935.git.naveen@kernel.org --- arch/powerpc/Kconfig | 14 +++++++--- arch/powerpc/Makefile | 5 ++++ arch/powerpc/include/asm/ftrace.h | 6 +++-- arch/powerpc/include/asm/vermagic.h | 4 ++- arch/powerpc/kernel/module_64.c | 2 +- arch/powerpc/kernel/trace/ftrace.c | 14 ++++++++-- arch/powerpc/kernel/trace/ftrace_entry.S | 2 ++ .../gcc-check-fpatchable-function-entry.sh | 26 +++++++++++++++++++ 8 files changed, 64 insertions(+), 9 deletions(-) create mode 100755 arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index cb0a89946c4e6..c831e20cf40fa 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -186,6 +186,7 @@ config PPC select DYNAMIC_FTRACE if FUNCTION_TRACER select EDAC_ATOMIC_SCRUB select EDAC_SUPPORT + select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if ARCH_USING_PATCHABLE_FUNCTION_ENTRY select GENERIC_ATOMIC64 if PPC32 select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_CMOS_UPDATE @@ -227,8 +228,8 @@ config PPC select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_ARGS if MPROFILE_KERNEL || PPC32 - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL || PPC32 + select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 + select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP @@ -256,7 +257,7 @@ config PPC select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S) select HAVE_OPTPROBES - select HAVE_OBJTOOL if PPC32 || MPROFILE_KERNEL + select HAVE_OBJTOOL if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_OBJTOOL_MCOUNT if HAVE_OBJTOOL select HAVE_PERF_EVENTS select HAVE_PERF_EVENTS_NMI if PPC64 @@ -554,6 +555,13 @@ config MPROFILE_KERNEL def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mlittle-endian) if CPU_LITTLE_ENDIAN def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-mprofile-kernel.sh $(CC) -mbig-endian) if CPU_BIG_ENDIAN +config ARCH_USING_PATCHABLE_FUNCTION_ENTRY + depends on FUNCTION_TRACER && (PPC32 || PPC64_ELF_ABI_V2) + depends on $(cc-option,-fpatchable-function-entry=2) + def_bool y if PPC32 + def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN + def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index dac7ca153886b..cbd34459cce08 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -143,11 +143,16 @@ CFLAGS-$(CONFIG_PPC32) += $(call cc-option, $(MULTIPLEWORD)) CFLAGS-$(CONFIG_PPC32) += $(call cc-option,-mno-readonly-in-sdata) ifdef CONFIG_FUNCTION_TRACER +ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY +KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY +CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +else CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL CC_FLAGS_FTRACE += -mprofile-kernel endif endif +endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU) AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index ef9f0b97670d1..9e5a39b6a3114 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -11,7 +11,7 @@ #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR /* Ignore unused weak functions which will have larger offsets */ -#ifdef CONFIG_MPROFILE_KERNEL +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) #define FTRACE_MCOUNT_MAX_OFFSET 16 #elif defined(CONFIG_PPC32) #define FTRACE_MCOUNT_MAX_OFFSET 8 @@ -22,7 +22,9 @@ extern void _mcount(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { - /* relocation of mcount call site is the same as the address */ + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + addr += MCOUNT_INSN_SIZE; + return addr; } diff --git a/arch/powerpc/include/asm/vermagic.h b/arch/powerpc/include/asm/vermagic.h index b054a8576e5de..6f250fe506bd1 100644 --- a/arch/powerpc/include/asm/vermagic.h +++ b/arch/powerpc/include/asm/vermagic.h @@ -2,7 +2,9 @@ #ifndef _ASM_VERMAGIC_H #define _ASM_VERMAGIC_H -#ifdef CONFIG_MPROFILE_KERNEL +#ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY +#define MODULE_ARCH_VERMAGIC_FTRACE "patchable-function-entry " +#elif defined(CONFIG_MPROFILE_KERNEL) #define MODULE_ARCH_VERMAGIC_FTRACE "mprofile-kernel " #else #define MODULE_ARCH_VERMAGIC_FTRACE "" diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 92570289ce08f..7112adc597a80 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -465,7 +465,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, return 0; } -#ifdef CONFIG_MPROFILE_KERNEL +#if defined(CONFIG_MPROFILE_KERNEL) || defined(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) static u32 stub_insns[] = { #ifdef CONFIG_PPC_KERNEL_PCREL diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index cf9dce7752792..82010629cf887 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -220,7 +220,12 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) int ret = 0; /* Verify instructions surrounding the ftrace location */ - if (IS_ENABLED(CONFIG_PPC32)) { + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + /* Expect nops */ + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!ret) + ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); + } else if (IS_ENABLED(CONFIG_PPC32)) { /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); if (!ret) @@ -250,7 +255,12 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); addr = MCOUNT_ADDR; - if (is_offset_in_branch_range(addr - ip)) { + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { + /* we instead patch-in the 'mflr r0' */ + old = ppc_inst(PPC_RAW_NOP()); + new = ppc_inst(PPC_RAW_MFLR(_R0)); + ret = ftrace_modify_code(ip - 4, old, new); + } else if (is_offset_in_branch_range(addr - ip)) { /* Within range */ old = ftrace_create_branch_inst(ip, addr, 1); ret = ftrace_modify_code(ip, old, new); diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 4e7103c316fb0..0b3d77d65a796 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -250,6 +250,7 @@ livepatch_handler: blr #endif /* CONFIG_LIVEPATCH */ +#ifndef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY _GLOBAL(mcount) _GLOBAL(_mcount) EXPORT_SYMBOL(_mcount) @@ -257,6 +258,7 @@ EXPORT_SYMBOL(_mcount) mtctr r12 mtlr r0 bctr +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER _GLOBAL(return_to_handler) diff --git a/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh new file mode 100755 index 0000000000000..06706903503b6 --- /dev/null +++ b/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +# To debug, uncomment the following line +# set -x + +# Output from -fpatchable-function-entry can only vary on ppc64 elfv2, so this +# should not be invoked for other targets. Therefore we can pass in -m64 and +# -mabi explicitly, to take care of toolchains defaulting to other targets. + +# Test whether the compile option -fpatchable-function-entry exists and +# generates appropriate code +echo "int func() { return 0; }" | \ + $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \ + grep -q "__patchable_function_entries" + +# Test whether nops are generated after the local entry point +echo "int x; int func() { return x; }" | \ + $* -m64 -mabi=elfv2 -S -x c -O2 -fpatchable-function-entry=2 - -o - 2> /dev/null | \ + awk 'BEGIN { RS = ";" } /\.localentry.*nop.*\n[[:space:]]*nop/ { print $0 }' | \ + grep -q "func:" + +exit 0 From 429356fac0440b962aaa6d3688709813a21dd122 Mon Sep 17 00:00:00 2001 From: Immad Mir Date: Sun, 28 May 2023 13:16:44 +0530 Subject: [PATCH 112/135] powerpc/powernv: fix debugfs_create_dir() error checking The debugfs_create_dir returns ERR_PTR incase of an error and the correct way of checking it by using the IS_ERR inline function, and not the simple null comparision. This patch fixes this. Suggested-by: Ivan Orlov Signed-off-by: Immad Mir Signed-off-by: Michael Ellerman Link: https://msgid.link/CY5PR12MB64553EE96EBB3927311DB598C6459@CY5PR12MB6455.namprd12.prod.outlook.com --- arch/powerpc/platforms/powernv/opal-xscom.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-xscom.c b/arch/powerpc/platforms/powernv/opal-xscom.c index 6b4eed2ef4fa9..262cd6fac9071 100644 --- a/arch/powerpc/platforms/powernv/opal-xscom.c +++ b/arch/powerpc/platforms/powernv/opal-xscom.c @@ -168,7 +168,7 @@ static int scom_debug_init_one(struct dentry *root, struct device_node *dn, ent->path.size = strlen((char *)ent->path.data); dir = debugfs_create_dir(ent->name, root); - if (!dir) { + if (IS_ERR(dir)) { kfree(ent->path.data); kfree(ent); return -1; @@ -190,7 +190,7 @@ static int scom_debug_init(void) return 0; root = debugfs_create_dir("scom", arch_debugfs_dir); - if (!root) + if (IS_ERR(root)) return -1; rc = 0; From 0e2a34c467a0de2b0309d033e2700ce608e3fbf4 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Aug 2023 08:07:50 +0200 Subject: [PATCH 113/135] powerpc/64e: Fix circular dependency with CONFIG_SMP disabled asm/percpu.h includes asm/paca.h which needs struct tlb_core_data which is defined in mmu-e500.h asm/percpu.h is included from asm/mmu.h in a #ifdef CONFIG_E500 before the inclusion of mmu-e500.h To fix that, move the inclusion of asm/percpu.h into mmu-e500.h after the definition of struct tlb_core_data Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308220708.nRf5AUAe-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202308220857.uFq2oAxM-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202308221055.lw3UzJIL-lkp@intel.com/ Fixes: 3a24ea0df83e ("powerpc/kuap: Use ASM feature fixups instead of static branches") Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/5e0f97d5cbcd05238b56b4424ab096468296824d.1692684461.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/mmu.h | 5 ----- arch/powerpc/include/asm/nohash/mmu-e500.h | 3 +++ 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h index 82af2e2c5eca7..52cc25864a1be 100644 --- a/arch/powerpc/include/asm/mmu.h +++ b/arch/powerpc/include/asm/mmu.h @@ -144,11 +144,6 @@ typedef pte_t *pgtable_t; -#ifdef CONFIG_PPC_E500 -#include -DECLARE_PER_CPU(int, next_tlbcam_idx); -#endif - enum { MMU_FTRS_POSSIBLE = #if defined(CONFIG_PPC_BOOK3S_604) diff --git a/arch/powerpc/include/asm/nohash/mmu-e500.h b/arch/powerpc/include/asm/nohash/mmu-e500.h index e43a418d3ccd0..6ddced0415cb5 100644 --- a/arch/powerpc/include/asm/nohash/mmu-e500.h +++ b/arch/powerpc/include/asm/nohash/mmu-e500.h @@ -319,6 +319,9 @@ extern int book3e_htw_mode; #endif +#include +DECLARE_PER_CPU(int, next_tlbcam_idx); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_POWERPC_MMU_BOOK3E_H_ */ From c265735ff5b1f13272e2bfb196f5c55f9b3c9bac Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Tue, 22 Aug 2023 08:13:13 +0200 Subject: [PATCH 114/135] powerpc/85xx: Mark some functions static and add missing includes to fix no previous prototype error corenet{32/64}_smp_defconfig leads to: CC arch/powerpc/sysdev/ehv_pic.o arch/powerpc/sysdev/ehv_pic.c:45:6: error: no previous prototype for 'ehv_pic_unmask_irq' [-Werror=missing-prototypes] 45 | void ehv_pic_unmask_irq(struct irq_data *d) | ^~~~~~~~~~~~~~~~~~ arch/powerpc/sysdev/ehv_pic.c:52:6: error: no previous prototype for 'ehv_pic_mask_irq' [-Werror=missing-prototypes] 52 | void ehv_pic_mask_irq(struct irq_data *d) | ^~~~~~~~~~~~~~~~ arch/powerpc/sysdev/ehv_pic.c:59:6: error: no previous prototype for 'ehv_pic_end_irq' [-Werror=missing-prototypes] 59 | void ehv_pic_end_irq(struct irq_data *d) | ^~~~~~~~~~~~~~~ arch/powerpc/sysdev/ehv_pic.c:66:6: error: no previous prototype for 'ehv_pic_direct_end_irq' [-Werror=missing-prototypes] 66 | void ehv_pic_direct_end_irq(struct irq_data *d) | ^~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/sysdev/ehv_pic.c:71:5: error: no previous prototype for 'ehv_pic_set_affinity' [-Werror=missing-prototypes] 71 | int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest, | ^~~~~~~~~~~~~~~~~~~~ arch/powerpc/sysdev/ehv_pic.c:112:5: error: no previous prototype for 'ehv_pic_set_irq_type' [-Werror=missing-prototypes] 112 | int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type) | ^~~~~~~~~~~~~~~~~~~~ CC arch/powerpc/sysdev/fsl_rio.o arch/powerpc/sysdev/fsl_rio.c:102:5: error: no previous prototype for 'fsl_rio_mcheck_exception' [-Werror=missing-prototypes] 102 | int fsl_rio_mcheck_exception(struct pt_regs *regs) | ^~~~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/sysdev/fsl_rio.c:306:5: error: no previous prototype for 'fsl_map_inb_mem' [-Werror=missing-prototypes] 306 | int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, | ^~~~~~~~~~~~~~~ arch/powerpc/sysdev/fsl_rio.c:357:6: error: no previous prototype for 'fsl_unmap_inb_mem' [-Werror=missing-prototypes] 357 | void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart) | ^~~~~~~~~~~~~~~~~ arch/powerpc/sysdev/fsl_rio.c:445:5: error: no previous prototype for 'fsl_rio_setup' [-Werror=missing-prototypes] 445 | int fsl_rio_setup(struct platform_device *dev) | ^~~~~~~~~~~~~ CC arch/powerpc/sysdev/fsl_rmu.o arch/powerpc/sysdev/fsl_rmu.c:362:6: error: no previous prototype for 'msg_unit_error_handler' [-Werror=missing-prototypes] 362 | void msg_unit_error_handler(void) | ^~~~~~~~~~~~~~~~~~~~~~ CC arch/powerpc/platforms/85xx/corenet_generic.o arch/powerpc/platforms/85xx/corenet_generic.c:33:13: error: no previous prototype for 'corenet_gen_pic_init' [-Werror=missing-prototypes] 33 | void __init corenet_gen_pic_init(void) | ^~~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/85xx/corenet_generic.c:51:13: error: no previous prototype for 'corenet_gen_setup_arch' [-Werror=missing-prototypes] 51 | void __init corenet_gen_setup_arch(void) | ^~~~~~~~~~~~~~~~~~~~~~ arch/powerpc/platforms/85xx/corenet_generic.c:104:12: error: no previous prototype for 'corenet_gen_publish_devices' [-Werror=missing-prototypes] 104 | int __init corenet_gen_publish_devices(void) | ^~~~~~~~~~~~~~~~~~~~~~~~~~~ CC arch/powerpc/platforms/85xx/qemu_e500.o arch/powerpc/platforms/85xx/qemu_e500.c:28:13: error: no previous prototype for 'qemu_e500_pic_init' [-Werror=missing-prototypes] 28 | void __init qemu_e500_pic_init(void) | ^~~~~~~~~~~~~~~~~~ CC arch/powerpc/kernel/pmc.o arch/powerpc/kernel/pmc.c:78:6: error: no previous prototype for 'power4_enable_pmcs' [-Werror=missing-prototypes] 78 | void power4_enable_pmcs(void) | ^~~~~~~~~~~~~~~~~~ Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/c90780017b624b91771a3e4240dcbadc68137915.1692684784.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/pmc.c | 2 +- arch/powerpc/platforms/85xx/corenet_generic.c | 6 +++--- arch/powerpc/platforms/85xx/qemu_e500.c | 2 +- arch/powerpc/sysdev/ehv_pic.c | 12 ++++++------ arch/powerpc/sysdev/fsl_rio.c | 9 +++++---- arch/powerpc/sysdev/fsl_rmu.c | 2 +- 6 files changed, 17 insertions(+), 16 deletions(-) diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c index 15414c8a28376..9fabb4d9235ef 100644 --- a/arch/powerpc/kernel/pmc.c +++ b/arch/powerpc/kernel/pmc.c @@ -74,7 +74,7 @@ void release_pmc_hardware(void) } EXPORT_SYMBOL_GPL(release_pmc_hardware); -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 void power4_enable_pmcs(void) { unsigned long hid0; diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c index bfde391c42f43..645fcca77cde5 100644 --- a/arch/powerpc/platforms/85xx/corenet_generic.c +++ b/arch/powerpc/platforms/85xx/corenet_generic.c @@ -30,7 +30,7 @@ #include "smp.h" #include "mpc85xx.h" -void __init corenet_gen_pic_init(void) +static void __init corenet_gen_pic_init(void) { struct mpic *mpic; unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | @@ -48,7 +48,7 @@ void __init corenet_gen_pic_init(void) /* * Setup the architecture */ -void __init corenet_gen_setup_arch(void) +static void __init corenet_gen_setup_arch(void) { mpc85xx_smp_init(); @@ -101,7 +101,7 @@ static const struct of_device_id of_device_ids[] = { {} }; -int __init corenet_gen_publish_devices(void) +static int __init corenet_gen_publish_devices(void) { return of_platform_bus_probe(NULL, of_device_ids, NULL); } diff --git a/arch/powerpc/platforms/85xx/qemu_e500.c b/arch/powerpc/platforms/85xx/qemu_e500.c index 6e4b1ddf292b7..3cd2f3bd42233 100644 --- a/arch/powerpc/platforms/85xx/qemu_e500.c +++ b/arch/powerpc/platforms/85xx/qemu_e500.c @@ -25,7 +25,7 @@ #include "smp.h" #include "mpc85xx.h" -void __init qemu_e500_pic_init(void) +static void __init qemu_e500_pic_init(void) { struct mpic *mpic; unsigned int flags = MPIC_BIG_ENDIAN | MPIC_SINGLE_DEST_CPU | diff --git a/arch/powerpc/sysdev/ehv_pic.c b/arch/powerpc/sysdev/ehv_pic.c index c7327b836d2b5..040827671d213 100644 --- a/arch/powerpc/sysdev/ehv_pic.c +++ b/arch/powerpc/sysdev/ehv_pic.c @@ -42,33 +42,33 @@ static u32 __iomem *mpic_percpu_base_vaddr; * Linux descriptor level callbacks */ -void ehv_pic_unmask_irq(struct irq_data *d) +static void ehv_pic_unmask_irq(struct irq_data *d) { unsigned int src = virq_to_hw(d->irq); ev_int_set_mask(src, 0); } -void ehv_pic_mask_irq(struct irq_data *d) +static void ehv_pic_mask_irq(struct irq_data *d) { unsigned int src = virq_to_hw(d->irq); ev_int_set_mask(src, 1); } -void ehv_pic_end_irq(struct irq_data *d) +static void ehv_pic_end_irq(struct irq_data *d) { unsigned int src = virq_to_hw(d->irq); ev_int_eoi(src); } -void ehv_pic_direct_end_irq(struct irq_data *d) +static void ehv_pic_direct_end_irq(struct irq_data *d) { out_be32(mpic_percpu_base_vaddr + MPIC_EOI / 4, 0); } -int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest, +static int ehv_pic_set_affinity(struct irq_data *d, const struct cpumask *dest, bool force) { unsigned int src = virq_to_hw(d->irq); @@ -109,7 +109,7 @@ static unsigned int ehv_pic_type_to_vecpri(unsigned int type) } } -int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type) +static int ehv_pic_set_irq_type(struct irq_data *d, unsigned int flow_type) { unsigned int src = virq_to_hw(d->irq); unsigned int vecpri, vold, vnew, prio, cpu_dest; diff --git a/arch/powerpc/sysdev/fsl_rio.c b/arch/powerpc/sysdev/fsl_rio.c index efd8f6291ea62..f9b214b299e70 100644 --- a/arch/powerpc/sysdev/fsl_rio.c +++ b/arch/powerpc/sysdev/fsl_rio.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "fsl_rio.h" @@ -303,8 +304,8 @@ static void fsl_rio_inbound_mem_init(struct rio_priv *priv) out_be32(&priv->inb_atmu_regs[i].riwar, 0); } -int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, - u64 rstart, u64 size, u32 flags) +static int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, + u64 rstart, u64 size, u32 flags) { struct rio_priv *priv = mport->priv; u32 base_size; @@ -354,7 +355,7 @@ int fsl_map_inb_mem(struct rio_mport *mport, dma_addr_t lstart, return 0; } -void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart) +static void fsl_unmap_inb_mem(struct rio_mport *mport, dma_addr_t lstart) { u32 win_start_shift, base_start_shift; struct rio_priv *priv = mport->priv; @@ -442,7 +443,7 @@ static inline void fsl_rio_info(struct device *dev, u32 ccsr) * master port with system-specific info, and registers the * master port with the RapidIO subsystem. */ -int fsl_rio_setup(struct platform_device *dev) +static int fsl_rio_setup(struct platform_device *dev) { struct rio_ops *ops; struct rio_mport *port; diff --git a/arch/powerpc/sysdev/fsl_rmu.c b/arch/powerpc/sysdev/fsl_rmu.c index 58221b6e1465a..f956591cb64e9 100644 --- a/arch/powerpc/sysdev/fsl_rmu.c +++ b/arch/powerpc/sysdev/fsl_rmu.c @@ -359,7 +359,7 @@ fsl_rio_dbell_handler(int irq, void *dev_instance) return IRQ_HANDLED; } -void msg_unit_error_handler(void) +static void msg_unit_error_handler(void) { /*XXX: Error recovery is not implemented, we just clear errors */ From feea65a338e52297b68ceb688eaf0ffc50310a83 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 22 Aug 2023 00:28:19 +1000 Subject: [PATCH 115/135] powerpc/powernv: Fix fortify source warnings in opal-prd.c As reported by Mahesh & Aneesh, opal_prd_msg_notifier() triggers a FORTIFY_SOURCE warning: memcpy: detected field-spanning write (size 32) of single field "&item->msg" at arch/powerpc/platforms/powernv/opal-prd.c:355 (size 4) WARNING: CPU: 9 PID: 660 at arch/powerpc/platforms/powernv/opal-prd.c:355 opal_prd_msg_notifier+0x174/0x188 [opal_prd] NIP opal_prd_msg_notifier+0x174/0x188 [opal_prd] LR opal_prd_msg_notifier+0x170/0x188 [opal_prd] Call Trace: opal_prd_msg_notifier+0x170/0x188 [opal_prd] (unreliable) notifier_call_chain+0xc0/0x1b0 atomic_notifier_call_chain+0x2c/0x40 opal_message_notify+0xf4/0x2c0 This happens because the copy is targeting item->msg, which is only 4 bytes in size, even though the enclosing item was allocated with extra space following the msg. To fix the warning define struct opal_prd_msg with a union of the header and a flex array, and have the memcpy target the flex array. Reported-by: "Aneesh Kumar K.V" Reported-by: Mahesh Salgaonkar Tested-by: Mahesh Salgaonkar Reviewed-by: Mahesh Salgaonkar Signed-off-by: Michael Ellerman Link: https://msgid.link/20230821142820.497107-1-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/opal-prd.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 113bdb151f687..40e26e9f318fd 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -24,13 +24,20 @@ #include +struct opal_prd_msg { + union { + struct opal_prd_msg_header header; + DECLARE_FLEX_ARRAY(u8, data); + }; +}; + /* * The msg member must be at the end of the struct, as it's followed by the * message data. */ struct opal_prd_msg_queue_item { - struct list_head list; - struct opal_prd_msg_header msg; + struct list_head list; + struct opal_prd_msg msg; }; static struct device_node *prd_node; @@ -156,7 +163,7 @@ static ssize_t opal_prd_read(struct file *file, char __user *buf, int rc; /* we need at least a header's worth of data */ - if (count < sizeof(item->msg)) + if (count < sizeof(item->msg.header)) return -EINVAL; if (*ppos) @@ -186,7 +193,7 @@ static ssize_t opal_prd_read(struct file *file, char __user *buf, return -EINTR; } - size = be16_to_cpu(item->msg.size); + size = be16_to_cpu(item->msg.header.size); if (size > count) { err = -EINVAL; goto err_requeue; @@ -352,7 +359,7 @@ static int opal_prd_msg_notifier(struct notifier_block *nb, if (!item) return -ENOMEM; - memcpy(&item->msg, msg->params, msg_size); + memcpy(&item->msg.data, msg->params, msg_size); spin_lock_irqsave(&opal_prd_msg_queue_lock, flags); list_add_tail(&item->list, &opal_prd_msg_queue); From 22b165617b779418166319a19fd926a9c6feb9a3 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 22 Aug 2023 00:28:20 +1000 Subject: [PATCH 116/135] powerpc/powernv: Use struct opal_prd_msg in more places Use the newly added struct opal_prd_msg in some other functions that operate on opal_prd messages, rather than using other types. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230821142820.497107-2-mpe@ellerman.id.au --- arch/powerpc/platforms/powernv/opal-prd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-prd.c b/arch/powerpc/platforms/powernv/opal-prd.c index 40e26e9f318fd..327e2f76905d5 100644 --- a/arch/powerpc/platforms/powernv/opal-prd.c +++ b/arch/powerpc/platforms/powernv/opal-prd.c @@ -221,8 +221,8 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { struct opal_prd_msg_header hdr; + struct opal_prd_msg *msg; ssize_t size; - void *msg; int rc; size = sizeof(hdr); @@ -254,12 +254,12 @@ static ssize_t opal_prd_write(struct file *file, const char __user *buf, static int opal_prd_release(struct inode *inode, struct file *file) { - struct opal_prd_msg_header msg; + struct opal_prd_msg msg; - msg.size = cpu_to_be16(sizeof(msg)); - msg.type = OPAL_PRD_MSG_TYPE_FINI; + msg.header.size = cpu_to_be16(sizeof(msg)); + msg.header.type = OPAL_PRD_MSG_TYPE_FINI; - opal_prd_msg((struct opal_prd_msg *)&msg); + opal_prd_msg(&msg); atomic_xchg(&prd_usage, 0); From fabdb27da78afb93b0a83c0579025cb8d05c0d2d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2023 15:54:30 +1000 Subject: [PATCH 117/135] powerpc: Drop zalloc_maybe_bootmem() The only callers of zalloc_maybe_bootmem() are PCI setup routines. These used to be called early during boot before slab setup, and also during runtime due to hotplug. But commit 5537fcb319d0 ("powerpc/pci: Add ppc_md.discover_phbs()") moved the boot-time calls later, after slab setup, meaning there's no longer any need for zalloc_maybe_bootmem(), kzalloc() can be used in all cases. Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20230823055430.752550-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/setup.h | 1 - arch/powerpc/kernel/pci-common.c | 2 +- arch/powerpc/lib/Makefile | 2 +- arch/powerpc/lib/alloc.c | 23 ----------------------- arch/powerpc/sysdev/fsl_pci.c | 2 +- 5 files changed, 3 insertions(+), 27 deletions(-) delete mode 100644 arch/powerpc/lib/alloc.c diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index e29e83f8a89c8..eed74c1fb832f 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -8,7 +8,6 @@ extern void ppc_printk_progress(char *s, unsigned short hex); extern unsigned long long memory_limit; -extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask); struct device_node; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e88d7c9feeec3..040255ddb5697 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -125,7 +125,7 @@ struct pci_controller *pcibios_alloc_controller(struct device_node *dev) { struct pci_controller *phb; - phb = zalloc_maybe_bootmem(sizeof(struct pci_controller), GFP_KERNEL); + phb = kzalloc(sizeof(struct pci_controller), GFP_KERNEL); if (phb == NULL) return NULL; diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 9aa8286c96871..51ad0397c17ab 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -27,7 +27,7 @@ endif CFLAGS_code-patching.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_feature-fixups.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -obj-y += alloc.o code-patching.o feature-fixups.o pmem.o +obj-y += code-patching.o feature-fixups.o pmem.o obj-$(CONFIG_CODE_PATCHING_SELFTEST) += test-code-patching.o diff --git a/arch/powerpc/lib/alloc.c b/arch/powerpc/lib/alloc.c deleted file mode 100644 index ce180870bd52f..0000000000000 --- a/arch/powerpc/lib/alloc.c +++ /dev/null @@ -1,23 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include -#include -#include -#include -#include -#include - - -void * __ref zalloc_maybe_bootmem(size_t size, gfp_t mask) -{ - void *p; - - if (slab_is_available()) - p = kzalloc(size, mask); - else { - p = memblock_alloc(size, SMP_CACHE_BYTES); - if (!p) - panic("%s: Failed to allocate %zu bytes\n", __func__, - size); - } - return p; -} diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c index 5f7219df35ef0..3868483fbe292 100644 --- a/arch/powerpc/sysdev/fsl_pci.c +++ b/arch/powerpc/sysdev/fsl_pci.c @@ -767,7 +767,7 @@ static int __init mpc83xx_pcie_setup(struct pci_controller *hose, u32 cfg_bar; int ret = -ENOMEM; - pcie = zalloc_maybe_bootmem(sizeof(*pcie), GFP_KERNEL); + pcie = kzalloc(sizeof(*pcie), GFP_KERNEL); if (!pcie) return ret; From fe32945203ffc8d6fed815f7ed7729219f8b0ab6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2023 14:48:03 +1000 Subject: [PATCH 118/135] cxl: Drop unused detach_spa() Clang warns: drivers/misc/cxl/native.c:272:20: error: unused function 'detach_spa' [-Werror,-Wunused-function] It was created as part of some refactoring in commit 05155772f642 ("cxl: Allocate and release the SPA with the AFU"), but has never been called in its current form. Drop it. Acked-by: Andrew Donnellan Signed-off-by: Michael Ellerman Link: https://msgid.link/20230823044803.737175-1-mpe@ellerman.id.au --- drivers/misc/cxl/native.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 50b0c44bb8d70..fbe16a6ab7adc 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -269,11 +269,6 @@ static void attach_spa(struct cxl_afu *afu) cxl_p1n_write(afu, CXL_PSL_SPAP_An, spap); } -static inline void detach_spa(struct cxl_afu *afu) -{ - cxl_p1n_write(afu, CXL_PSL_SPAP_An, 0); -} - void cxl_release_spa(struct cxl_afu *afu) { if (afu->native->spa) { From c040c7488b6a89c98dd0f6dd5f001101413779e2 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2023 15:53:14 +1000 Subject: [PATCH 119/135] powerpc/pseries: Move VPHN constants into vphn.h These don't have any particularly good reason to belong in lppaca.h, move them into their own header. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230823055317.751786-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/lppaca.h | 22 ------------------- arch/powerpc/include/asm/vphn.h | 22 +++++++++++++++++++ arch/powerpc/mm/numa.c | 1 + arch/powerpc/platforms/pseries/lpar.c | 1 + arch/powerpc/platforms/pseries/vas.c | 1 + arch/powerpc/platforms/pseries/vphn.c | 2 +- .../selftests/powerpc/vphn/asm/lppaca.h | 1 - .../testing/selftests/powerpc/vphn/asm/vphn.h | 1 + 8 files changed, 27 insertions(+), 24 deletions(-) create mode 100644 arch/powerpc/include/asm/vphn.h delete mode 120000 tools/testing/selftests/powerpc/vphn/asm/lppaca.h create mode 120000 tools/testing/selftests/powerpc/vphn/asm/vphn.h diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 34d44cb17c874..12159e5b68887 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -6,28 +6,6 @@ #ifndef _ASM_POWERPC_LPPACA_H #define _ASM_POWERPC_LPPACA_H -/* - * The below VPHN macros are outside the __KERNEL__ check since these are - * used for compiling the vphn selftest in userspace - */ - -/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ -#define VPHN_REGISTER_COUNT 6 - -/* - * 6 64-bit registers unpacked into up to 24 be32 associativity values. To - * form the complete property we have to add the length in the first cell. - */ -#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1) - -/* - * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags: - * 1 for retrieving associativity information for a guest cpu - * 2 for retrieving associativity information for a host/hypervisor cpu - */ -#define VPHN_FLAG_VCPU 1 -#define VPHN_FLAG_PCPU 2 - #ifdef __KERNEL__ /* diff --git a/arch/powerpc/include/asm/vphn.h b/arch/powerpc/include/asm/vphn.h new file mode 100644 index 0000000000000..e0970603fce2c --- /dev/null +++ b/arch/powerpc/include/asm/vphn.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_POWERPC_VPHN_H +#define _ASM_POWERPC_VPHN_H + +/* The H_HOME_NODE_ASSOCIATIVITY h_call returns 6 64-bit registers. */ +#define VPHN_REGISTER_COUNT 6 + +/* + * 6 64-bit registers unpacked into up to 24 be32 associativity values. To + * form the complete property we have to add the length in the first cell. + */ +#define VPHN_ASSOC_BUFSIZE (VPHN_REGISTER_COUNT*sizeof(u64)/sizeof(u16) + 1) + +/* + * The H_HOME_NODE_ASSOCIATIVITY hcall takes two values for flags: + * 1 for retrieving associativity information for a guest cpu + * 2 for retrieving associativity information for a host/hypervisor cpu + */ +#define VPHN_FLAG_VCPU 1 +#define VPHN_FLAG_PCPU 2 + +#endif // _ASM_POWERPC_VPHN_H diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 9f73d089eac17..f6c4ace3b2219 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -34,6 +34,7 @@ #include #include #include +#include #include static int numa_enabled = 1; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 2eab323f69706..27fb656bd6ba2 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "pseries.h" diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index 3fbc2a6aa319d..e25ac52acf507 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "vas.h" diff --git a/arch/powerpc/platforms/pseries/vphn.c b/arch/powerpc/platforms/pseries/vphn.c index cca474a2c3969..3f85ece3c872b 100644 --- a/arch/powerpc/platforms/pseries/vphn.c +++ b/arch/powerpc/platforms/pseries/vphn.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include +#include /* * The associativity domain numbers are returned from the hypervisor as a diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h deleted file mode 120000 index 942b1d00999c1..0000000000000 --- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h +++ /dev/null @@ -1 +0,0 @@ -../../../../../../arch/powerpc/include/asm/lppaca.h \ No newline at end of file diff --git a/tools/testing/selftests/powerpc/vphn/asm/vphn.h b/tools/testing/selftests/powerpc/vphn/asm/vphn.h new file mode 120000 index 0000000000000..3a0b2a00171c0 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/asm/vphn.h @@ -0,0 +1 @@ +../../../../../../arch/powerpc/include/asm/vphn.h \ No newline at end of file From 9a6c05fe9a998386a61b5e70ce07d31ec47a01a0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2023 15:53:15 +1000 Subject: [PATCH 120/135] powerpc/pseries: Move hcall_vphn() prototype into vphn.h Consolidate the two prototypes for hcall_vphn() into vphn.h. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230823055317.751786-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/dtl.h | 1 - arch/powerpc/include/asm/lppaca.h | 2 -- arch/powerpc/include/asm/vphn.h | 2 ++ 3 files changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h index 4bcb9f9ac7649..d6f43d149f8dc 100644 --- a/arch/powerpc/include/asm/dtl.h +++ b/arch/powerpc/include/asm/dtl.h @@ -39,6 +39,5 @@ extern rwlock_t dtl_access_lock; extern void register_dtl_buffer(int cpu); extern void alloc_dtl_buffers(unsigned long *time_limit); -extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); #endif /* _ASM_POWERPC_DTL_H */ diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 12159e5b68887..27f0421188ec8 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -127,8 +127,6 @@ struct slb_shadow { } save_area[SLB_NUM_BOLTED]; } ____cacheline_aligned; -extern long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); - #endif /* CONFIG_PPC_BOOK3S */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_LPPACA_H */ diff --git a/arch/powerpc/include/asm/vphn.h b/arch/powerpc/include/asm/vphn.h index e0970603fce2c..8c2f795eea685 100644 --- a/arch/powerpc/include/asm/vphn.h +++ b/arch/powerpc/include/asm/vphn.h @@ -19,4 +19,6 @@ #define VPHN_FLAG_VCPU 1 #define VPHN_FLAG_PCPU 2 +long hcall_vphn(unsigned long cpu, u64 flags, __be32 *associativity); + #endif // _ASM_POWERPC_VPHN_H From 1aa000667669fa855853decbb1c69e974d8ff716 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 23 Aug 2023 15:53:16 +1000 Subject: [PATCH 121/135] powerpc: Don't include lppaca.h in paca.h By adding a forward declaration for struct lppaca we can untangle paca.h and lppaca.h. Also move get_lppaca() into lppaca.h for consistency. Add includes of lppaca.h to some files that need it. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230823055317.751786-3-mpe@ellerman.id.au --- arch/powerpc/include/asm/lppaca.h | 4 ++++ arch/powerpc/include/asm/paca.h | 6 +----- arch/powerpc/include/asm/paravirt.h | 1 + arch/powerpc/include/asm/plpar_wrappers.h | 1 + arch/powerpc/kvm/book3s_hv_ras.c | 1 + arch/powerpc/mm/book3s64/slb.c | 1 + arch/powerpc/xmon/xmon.c | 1 + 7 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index 27f0421188ec8..b6a63fa0965fc 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -112,6 +112,10 @@ static inline bool lppaca_shared_proc(struct lppaca *l) return !!(l->__old_status & LPPACA_OLD_SHARED_PROC); } +#ifdef CONFIG_PPC_PSERIES +#define get_lppaca() (get_paca()->lppaca_ptr) +#endif + /* * SLB shadow buffer structure as defined in the PAPR. The save_area * contains adjacent ESID and VSID pairs for each shadowed SLB. The diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index cb325938766a5..e667d455ecb41 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #ifdef CONFIG_PPC_BOOK3E_64 @@ -47,14 +46,11 @@ extern unsigned int debug_smp_processor_id(void); /* from linux/smp.h */ #define get_paca() local_paca #endif -#ifdef CONFIG_PPC_PSERIES -#define get_lppaca() (get_paca()->lppaca_ptr) -#endif - #define get_slb_shadow() (get_paca()->slb_shadow_ptr) struct task_struct; struct rtas_args; +struct lppaca; /* * Defines the layout of the paca. diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h index f5ba1a3c41f8e..e08513d731193 100644 --- a/arch/powerpc/include/asm/paravirt.h +++ b/arch/powerpc/include/asm/paravirt.h @@ -6,6 +6,7 @@ #include #ifdef CONFIG_PPC64 #include +#include #include #endif diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h index 8239c0af5eb2b..fe3d0ea0058ac 100644 --- a/arch/powerpc/include/asm/plpar_wrappers.h +++ b/arch/powerpc/include/asm/plpar_wrappers.h @@ -9,6 +9,7 @@ #include #include +#include #include static inline long poll_pending(void) diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c index ccfd969656306..82be6d87514b7 100644 --- a/arch/powerpc/kvm/book3s_hv_ras.c +++ b/arch/powerpc/kvm/book3s_hv_ras.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 6956f637a38c1..f2708c8629a52 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 78453b9b1ba0e..6c6f90f1da94f 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -58,6 +58,7 @@ #ifdef CONFIG_PPC64 #include #include +#include #endif #include "nonstdio.h" From eac030b22ea12cdfcbb2e941c21c03964403c63f Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 23 Aug 2023 15:53:17 +1000 Subject: [PATCH 122/135] powerpc/pseries: Rework lppaca_shared_proc() to avoid DEBUG_PREEMPT lppaca_shared_proc() takes a pointer to the lppaca which is typically accessed through get_lppaca(). With DEBUG_PREEMPT enabled, this leads to checking if preemption is enabled, for example: BUG: using smp_processor_id() in preemptible [00000000] code: grep/10693 caller is lparcfg_data+0x408/0x19a0 CPU: 4 PID: 10693 Comm: grep Not tainted 6.5.0-rc3 #2 Call Trace: dump_stack_lvl+0x154/0x200 (unreliable) check_preemption_disabled+0x214/0x220 lparcfg_data+0x408/0x19a0 ... This isn't actually a problem however, as it does not matter which lppaca is accessed, the shared proc state will be the same. vcpudispatch_stats_procfs_init() already works around this by disabling preemption, but the lparcfg code does not, erroring any time /proc/powerpc/lparcfg is accessed with DEBUG_PREEMPT enabled. Instead of disabling preemption on the caller side, rework lppaca_shared_proc() to not take a pointer and instead directly access the lppaca, bypassing any potential preemption checks. Fixes: f13c13a00512 ("powerpc: Stop using non-architected shared_proc field in lppaca") Signed-off-by: Russell Currey [mpe: Rework to avoid needing a definition in paca.h and lppaca.h] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230823055317.751786-4-mpe@ellerman.id.au --- arch/powerpc/include/asm/lppaca.h | 11 +++++++++-- arch/powerpc/platforms/pseries/lpar.c | 10 +--------- arch/powerpc/platforms/pseries/lparcfg.c | 4 ++-- arch/powerpc/platforms/pseries/setup.c | 2 +- drivers/cpuidle/cpuidle-pseries.c | 8 +------- 5 files changed, 14 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/lppaca.h b/arch/powerpc/include/asm/lppaca.h index b6a63fa0965fc..61ec2447dabf5 100644 --- a/arch/powerpc/include/asm/lppaca.h +++ b/arch/powerpc/include/asm/lppaca.h @@ -23,6 +23,7 @@ #include #include #include +#include /* * The lppaca is the "virtual processor area" registered with the hypervisor, @@ -105,14 +106,20 @@ struct lppaca { */ #define LPPACA_OLD_SHARED_PROC 2 -static inline bool lppaca_shared_proc(struct lppaca *l) +#ifdef CONFIG_PPC_PSERIES +/* + * All CPUs should have the same shared proc value, so directly access the PACA + * to avoid false positives from DEBUG_PREEMPT. + */ +static inline bool lppaca_shared_proc(void) { + struct lppaca *l = local_paca->lppaca_ptr; + if (!firmware_has_feature(FW_FEATURE_SPLPAR)) return false; return !!(l->__old_status & LPPACA_OLD_SHARED_PROC); } -#ifdef CONFIG_PPC_PSERIES #define get_lppaca() (get_paca()->lppaca_ptr) #endif diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 27fb656bd6ba2..f2cb62148f36f 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -640,16 +640,8 @@ static const struct proc_ops vcpudispatch_stats_freq_proc_ops = { static int __init vcpudispatch_stats_procfs_init(void) { - /* - * Avoid smp_processor_id while preemptible. All CPUs should have - * the same value for lppaca_shared_proc. - */ - preempt_disable(); - if (!lppaca_shared_proc(get_lppaca())) { - preempt_enable(); + if (!lppaca_shared_proc()) return 0; - } - preempt_enable(); if (!proc_create("powerpc/vcpudispatch_stats", 0600, NULL, &vcpudispatch_stats_proc_ops)) diff --git a/arch/powerpc/platforms/pseries/lparcfg.c b/arch/powerpc/platforms/pseries/lparcfg.c index 8acc705095209..1c151d77e74b3 100644 --- a/arch/powerpc/platforms/pseries/lparcfg.c +++ b/arch/powerpc/platforms/pseries/lparcfg.c @@ -206,7 +206,7 @@ static void parse_ppp_data(struct seq_file *m) ppp_data.active_system_procs); /* pool related entries are appropriate for shared configs */ - if (lppaca_shared_proc(get_lppaca())) { + if (lppaca_shared_proc()) { unsigned long pool_idle_time, pool_procs; seq_printf(m, "pool=%d\n", ppp_data.pool_num); @@ -560,7 +560,7 @@ static int pseries_lparcfg_data(struct seq_file *m, void *v) partition_potential_processors); seq_printf(m, "shared_processor_mode=%d\n", - lppaca_shared_proc(get_lppaca())); + lppaca_shared_proc()); #ifdef CONFIG_PPC_64S_HASH_MMU if (!radix_enabled()) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index bb0a9aeb50f90..ecea85c74c43f 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -849,7 +849,7 @@ static void __init pSeries_setup_arch(void) if (firmware_has_feature(FW_FEATURE_LPAR)) { vpa_init(boot_cpuid); - if (lppaca_shared_proc(get_lppaca())) { + if (lppaca_shared_proc()) { static_branch_enable(&shared_processor); pv_spinlocks_init(); #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index a7d33f3ee01e7..14db9b7d985d1 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -414,13 +414,7 @@ static int __init pseries_idle_probe(void) return -ENODEV; if (firmware_has_feature(FW_FEATURE_SPLPAR)) { - /* - * Use local_paca instead of get_lppaca() since - * preemption is not disabled, and it is not required in - * fact, since lppaca_ptr does not need to be the value - * associated to the current CPU, it can be from any CPU. - */ - if (lppaca_shared_proc(local_paca->lppaca_ptr)) { + if (lppaca_shared_proc()) { cpuidle_state_table = shared_states; max_idle_state = ARRAY_SIZE(shared_states); } else { From 1eafbd8764b10798934344bd40395b27cec63145 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 22 Aug 2023 00:09:49 +1000 Subject: [PATCH 123/135] powerpc/powermac: Fix unused function warning Clang reports: arch/powerpc/platforms/powermac/feature.c:137:19: error: unused function 'simple_feature_tweak' It's only used inside the #ifndef CONFIG_PPC64 block, so move it in there to fix the warning. While at it drop the inline, the compiler will decide whether it should be inlined or not. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202308181501.AR5HMDWC-lkp@intel.com/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20230821140949.491881-1-mpe@ellerman.id.au --- arch/powerpc/platforms/powermac/feature.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c index ed472b797e28a..ae62d432db8bb 100644 --- a/arch/powerpc/platforms/powermac/feature.c +++ b/arch/powerpc/platforms/powermac/feature.c @@ -134,8 +134,10 @@ static struct pmac_mb_def pmac_mb; * Here are the chip specific feature functions */ -static inline int simple_feature_tweak(struct device_node *node, int type, - int reg, u32 mask, int value) +#ifndef CONFIG_PPC64 + +static int simple_feature_tweak(struct device_node *node, int type, int reg, + u32 mask, int value) { struct macio_chip* macio; unsigned long flags; @@ -154,8 +156,6 @@ static inline int simple_feature_tweak(struct device_node *node, int type, return 0; } -#ifndef CONFIG_PPC64 - static long ohare_htw_scc_enable(struct device_node *node, long param, long value) { From 50832720ec54c39ab189cd5e057aec1c514978ce Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 30 Mar 2023 10:43:08 +1100 Subject: [PATCH 124/135] powerpc/64s: Move CPU -mtune options into Kconfig Currently the -mtune options are set in the Makefile, depending on what the compiler supports. One downside of doing it that way is that the chosen -mtune option is not recorded in the .config. Another downside is that if there's ever a need to do more complicated logic to calculate the correct option, that gets messy in the Makefile. So move the determination of which -mtune option to use into Kconfig logic. Signed-off-by: Michael Ellerman Link: https://msgid.link/20230329234308.2215833-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 4 +--- arch/powerpc/platforms/Kconfig.cputype | 7 +++++++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index cbd34459cce08..f19dbaa1d5413 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -157,9 +157,7 @@ endif CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU) AFLAGS-$(CONFIG_TARGET_CPU_BOOL) += -mcpu=$(CONFIG_TARGET_CPU) -CFLAGS-$(CONFIG_POWERPC64_CPU) += $(call cc-option,-mtune=power10, \ - $(call cc-option,-mtune=power9, \ - $(call cc-option,-mtune=power8))) +CFLAGS-y += $(CONFIG_TUNE_CPU) asinstr := $(call as-instr,lis 9$(comma)foo@high,-DHAVE_AS_ATHIGH=1) diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 45fd975ef5212..db39eca3b1be7 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -275,6 +275,13 @@ config TARGET_CPU default "e500mc" if E500MC_CPU default "powerpc" if POWERPC_CPU +config TUNE_CPU + string + depends on POWERPC64_CPU + default "-mtune=power10" if $(cc-option,-mtune=power10) + default "-mtune=power9" if $(cc-option,-mtune=power9) + default "-mtune=power8" if $(cc-option,-mtune=power8) + config PPC_BOOK3S def_bool y depends on PPC_BOOK3S_32 || PPC_BOOK3S_64 From 664ec38673bef18bbcc4ede02274c8977470823f Mon Sep 17 00:00:00 2001 From: Jialin Zhang Date: Tue, 15 Aug 2023 10:33:03 +0800 Subject: [PATCH 125/135] powerpc/eeh: Use pci_dev_id() to simplify the code PCI core API pci_dev_id() can be used to get the BDF number for a pci device. We don't need to compose it mannually. Use pci_dev_id() to simplify the code a little bit. Signed-off-by: Jialin Zhang Signed-off-by: Michael Ellerman Link: https://msgid.link/20230815023303.3515503-1-zhangjialin11@huawei.com --- arch/powerpc/platforms/powernv/eeh-powernv.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c index a83cb679dd59f..af3a5d37a1496 100644 --- a/arch/powerpc/platforms/powernv/eeh-powernv.c +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c @@ -855,8 +855,7 @@ static int pnv_eeh_bridge_reset(struct pci_dev *pdev, int option) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct device_node *dn = pci_device_to_OF_node(pdev); - uint64_t id = PCI_SLOT_ID(phb->opal_id, - (pdev->bus->number << 8) | pdev->devfn); + uint64_t id = PCI_SLOT_ID(phb->opal_id, pci_dev_id(pdev)); uint8_t scope; int64_t rc; From 11073886cc4a2746845e8d113cadec2578c85033 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 5 Jul 2023 16:57:43 +0200 Subject: [PATCH 126/135] powerpc: dts: add missing space before { Add missing whitespace between node name/label and opening {. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Michael Ellerman Link: https://msgid.link/20230705145743.292855-1-krzysztof.kozlowski@linaro.org --- arch/powerpc/boot/dts/fsl/c293si-post.dtsi | 12 ++++++------ arch/powerpc/boot/dts/fsl/p1022rdk.dts | 10 +++++----- arch/powerpc/boot/dts/fsl/p1022si-post.dtsi | 2 +- arch/powerpc/boot/dts/fsl/p3041ds.dts | 4 ++-- arch/powerpc/boot/dts/fsl/p5040ds.dts | 2 +- arch/powerpc/boot/dts/fsl/t4240qds.dts | 2 +- arch/powerpc/boot/dts/mpc5121.dtsi | 2 +- arch/powerpc/boot/dts/mpc5125twr.dts | 2 +- 8 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi index bec0fc36849dd..f208fb8f64b37 100644 --- a/arch/powerpc/boot/dts/fsl/c293si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/c293si-post.dtsi @@ -124,10 +124,10 @@ reg = <0x80000 0x20000>; ranges = <0x0 0x80000 0x20000>; - jr@1000{ + jr@1000 { interrupts = <45 2 0 0>; }; - jr@2000{ + jr@2000 { interrupts = <57 2 0 0>; }; }; @@ -140,10 +140,10 @@ reg = <0xa0000 0x20000>; ranges = <0x0 0xa0000 0x20000>; - jr@1000{ + jr@1000 { interrupts = <49 2 0 0>; }; - jr@2000{ + jr@2000 { interrupts = <50 2 0 0>; }; }; @@ -156,10 +156,10 @@ reg = <0xc0000 0x20000>; ranges = <0x0 0xc0000 0x20000>; - jr@1000{ + jr@1000 { interrupts = <55 2 0 0>; }; - jr@2000{ + jr@2000 { interrupts = <56 2 0 0>; }; }; diff --git a/arch/powerpc/boot/dts/fsl/p1022rdk.dts b/arch/powerpc/boot/dts/fsl/p1022rdk.dts index 29e8af1e3711d..4261c2f7e4b38 100644 --- a/arch/powerpc/boot/dts/fsl/p1022rdk.dts +++ b/arch/powerpc/boot/dts/fsl/p1022rdk.dts @@ -60,23 +60,23 @@ compatible = "st,m41t62"; reg = <0x68>; }; - adt7461@4c{ + adt7461@4c { compatible = "adi,adt7461"; reg = <0x4c>; }; - zl6100@21{ + zl6100@21 { compatible = "isil,zl6100"; reg = <0x21>; }; - zl6100@24{ + zl6100@24 { compatible = "isil,zl6100"; reg = <0x24>; }; - zl6100@26{ + zl6100@26 { compatible = "isil,zl6100"; reg = <0x26>; }; - zl6100@29{ + zl6100@29 { compatible = "isil,zl6100"; reg = <0x29>; }; diff --git a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi index 5f51b7bfc0640..093e4e3ed3689 100644 --- a/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi +++ b/arch/powerpc/boot/dts/fsl/p1022si-post.dtsi @@ -238,7 +238,7 @@ fsl,has-rstcr; }; - power@e0070{ + power@e0070 { compatible = "fsl,mpc8536-pmc", "fsl,mpc8548-pmc"; reg = <0xe0070 0x20>; }; diff --git a/arch/powerpc/boot/dts/fsl/p3041ds.dts b/arch/powerpc/boot/dts/fsl/p3041ds.dts index 6f5f7283c533b..ca0e0272ac626 100644 --- a/arch/powerpc/boot/dts/fsl/p3041ds.dts +++ b/arch/powerpc/boot/dts/fsl/p3041ds.dts @@ -41,7 +41,7 @@ #size-cells = <2>; interrupt-parent = <&mpic>; - aliases{ + aliases { phy_rgmii_0 = &phy_rgmii_0; phy_rgmii_1 = &phy_rgmii_1; phy_sgmii_1c = &phy_sgmii_1c; @@ -165,7 +165,7 @@ }; }; - fman@400000{ + fman@400000 { ethernet@e0000 { phy-handle = <&phy_sgmii_1c>; phy-connection-type = "sgmii"; diff --git a/arch/powerpc/boot/dts/fsl/p5040ds.dts b/arch/powerpc/boot/dts/fsl/p5040ds.dts index 30850b3228e08..5cfc689ee474e 100644 --- a/arch/powerpc/boot/dts/fsl/p5040ds.dts +++ b/arch/powerpc/boot/dts/fsl/p5040ds.dts @@ -41,7 +41,7 @@ #size-cells = <2>; interrupt-parent = <&mpic>; - aliases{ + aliases { phy_sgmii_slot2_1c = &phy_sgmii_slot2_1c; phy_sgmii_slot2_1d = &phy_sgmii_slot2_1d; phy_sgmii_slot2_1e = &phy_sgmii_slot2_1e; diff --git a/arch/powerpc/boot/dts/fsl/t4240qds.dts b/arch/powerpc/boot/dts/fsl/t4240qds.dts index c0913ac5aaadb..128b5798bb972 100644 --- a/arch/powerpc/boot/dts/fsl/t4240qds.dts +++ b/arch/powerpc/boot/dts/fsl/t4240qds.dts @@ -41,7 +41,7 @@ #size-cells = <2>; interrupt-parent = <&mpic>; - aliases{ + aliases { phy_rgmii1 = &phyrgmii1; phy_rgmii2 = &phyrgmii2; phy_sgmii3 = &phy3; diff --git a/arch/powerpc/boot/dts/mpc5121.dtsi b/arch/powerpc/boot/dts/mpc5121.dtsi index 3f66b91a8e3ce..d3fc8062fbcd7 100644 --- a/arch/powerpc/boot/dts/mpc5121.dtsi +++ b/arch/powerpc/boot/dts/mpc5121.dtsi @@ -140,7 +140,7 @@ }; /* Power Management Controller */ - pmc@1000{ + pmc@1000 { compatible = "fsl,mpc5121-pmc"; reg = <0x1000 0x100>; interrupts = <83 0x8>; diff --git a/arch/powerpc/boot/dts/mpc5125twr.dts b/arch/powerpc/boot/dts/mpc5125twr.dts index 0bd2acc0401d9..ee090709aa3a0 100644 --- a/arch/powerpc/boot/dts/mpc5125twr.dts +++ b/arch/powerpc/boot/dts/mpc5125twr.dts @@ -104,7 +104,7 @@ clock-names = "osc"; }; - pmc@1000{ // Power Management Controller + pmc@1000 { // Power Management Controller compatible = "fsl,mpc5121-pmc"; reg = <0x1000 0x100>; interrupts = <83 0x2>; From 750bd41aeaeb1f0e0128aa4f8fcd6dd759713641 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 9 May 2023 19:15:59 +1000 Subject: [PATCH 127/135] powerpc/pseries: Fix hcall tracepoints with JUMP_LABEL=n With JUMP_LABEL=n, hcall_tracepoint_refcount's address is being tested instead of its value. This results in the tracing slowpath always being taken unnecessarily. Fixes: 9a10ccb29c0a2 ("powerpc/pseries: move hcall_tracepoint_refcount out of .toc") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20230509091600.70994-1-npiggin@gmail.com --- arch/powerpc/platforms/pseries/hvCall.S | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index 35254ac7af5ee..ca0674b0b683e 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -91,6 +91,7 @@ BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \ + ld r12,0(r12); \ std r12,32(r1); \ cmpdi r12,0; \ bne- LABEL; \ From 61d7ebe0376e2640ba77be16e186b1a6c77eb3f7 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 9 May 2023 19:16:00 +1000 Subject: [PATCH 128/135] powerpc/pseries: Remove unused hcall tracing instruction When JUMP_LABEL=n, the tracepoint refcount test in the pre-call stores the refcount value to the stack, so the same value can be used for the post-call (presumably to avoid racing with the value concurrently changing). On little-endian (ELFv2) that might have just worked by luck, because 32(r1) is STK_PARAM(R3) there and so the value save gets clobbered by the tracing code when it's non-zero, but fortunately r3 is the hcall number and 0 is an invalid hcall number so it should get clobbered by another non-zero value. In any case, commit cc1adb5f32557 ("powerpc/pseries: Use jump labels for hcall tracepoints") removed the code that actually used the value stored, so now it's just dead code. It's fragile to be storing to the stack like this, and confusing. Better remove it. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20230509091600.70994-2-npiggin@gmail.com --- arch/powerpc/platforms/pseries/hvCall.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S index ca0674b0b683e..bae45b358a094 100644 --- a/arch/powerpc/platforms/pseries/hvCall.S +++ b/arch/powerpc/platforms/pseries/hvCall.S @@ -92,7 +92,6 @@ BEGIN_FTR_SECTION; \ END_FTR_SECTION(0, 1); \ LOAD_REG_ADDR(r12, hcall_tracepoint_refcount) ; \ ld r12,0(r12); \ - std r12,32(r1); \ cmpdi r12,0; \ bne- LABEL; \ 1: From cdebfd27292ecdebe7d493830354e302368b3188 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Wed, 5 Jul 2023 12:00:56 +0930 Subject: [PATCH 129/135] powerpc/config: Disable SLAB_DEBUG_ON in skiroot In 5.10 commit 5e84dd547bce ("powerpc/configs/skiroot: Enable some more hardening options") set SLUB_DEBUG_ON. When 5.14 came around, commit 792702911f58 ("slub: force on no_hash_pointers when slub_debug is enabled") print all the pointers when SLUB_DEBUG_ON is set. This was fine, but in 5.12 commit 5ead723a20e0 ("lib/vsprintf: no_hash_pointers prints all addresses as unhashed") added the warning at boot. Disable SLAB_DEBUG_ON as we don't want the nasty warning. We have CONFIG_EXPERT so SLAB_DEBUG is enabled. We do lose the settings in DEBUG_DEFAULT_FLAGS, but it's not clear that these should have been always-on anyway. Signed-off-by: Joel Stanley Signed-off-by: Michael Ellerman Link: https://msgid.link/20230705023056.16273-1-joel@jms.id.au --- arch/powerpc/configs/skiroot_defconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 71cfb990a74f3..8d3eacb50d560 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -289,7 +289,6 @@ CONFIG_LIBCRC32C=y # CONFIG_XZ_DEC_SPARC is not set CONFIG_PRINTK_TIME=y CONFIG_MAGIC_SYSRQ=y -CONFIG_SLUB_DEBUG_ON=y CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_STACKOVERFLOW=y CONFIG_PANIC_ON_OOPS=y From b9bbbf4979073d5536b7650decd37fcb901e6556 Mon Sep 17 00:00:00 2001 From: Liang He Date: Wed, 22 Mar 2023 11:04:23 +0800 Subject: [PATCH 130/135] powerpc/mpc5xxx: Add missing fwnode_handle_put() In mpc5xxx_fwnode_get_bus_frequency(), we should add fwnode_handle_put() when break out of the iteration fwnode_for_each_parent_node() as it will automatically increase and decrease the refcounter. Fixes: de06fba62af6 ("powerpc/mpc5xxx: Switch mpc5xxx_get_bus_frequency() to use fwnode") Signed-off-by: Liang He Signed-off-by: Michael Ellerman Link: https://msgid.link/20230322030423.1855440-1-windhl@126.com --- arch/powerpc/sysdev/mpc5xxx_clocks.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/mpc5xxx_clocks.c b/arch/powerpc/sysdev/mpc5xxx_clocks.c index c5bf7e1b37804..58cee28e23992 100644 --- a/arch/powerpc/sysdev/mpc5xxx_clocks.c +++ b/arch/powerpc/sysdev/mpc5xxx_clocks.c @@ -25,8 +25,10 @@ unsigned long mpc5xxx_fwnode_get_bus_frequency(struct fwnode_handle *fwnode) fwnode_for_each_parent_node(fwnode, parent) { ret = fwnode_property_read_u32(parent, "bus-frequency", &bus_freq); - if (!ret) + if (!ret) { + fwnode_handle_put(parent); return bus_freq; + } } return 0; From c37b6908f7b2bd24dcaaf14a180e28c9132b9c58 Mon Sep 17 00:00:00 2001 From: Russell Currey Date: Wed, 22 Mar 2023 14:53:22 +1100 Subject: [PATCH 131/135] powerpc/iommu: Fix notifiers being shared by PCI and VIO buses fail_iommu_setup() registers the fail_iommu_bus_notifier struct to both PCI and VIO buses. struct notifier_block is a linked list node, so this causes any notifiers later registered to either bus type to also be registered to the other since they share the same node. This causes issues in (at least) the vgaarb code, which registers a notifier for PCI buses. pci_notify() ends up being called on a vio device, converted with to_pci_dev() even though it's not a PCI device, and finally makes a bad access in vga_arbiter_add_pci_device() as discovered with KASAN: BUG: KASAN: slab-out-of-bounds in vga_arbiter_add_pci_device+0x60/0xe00 Read of size 4 at addr c000000264c26fdc by task swapper/0/1 Call Trace: dump_stack_lvl+0x1bc/0x2b8 (unreliable) print_report+0x3f4/0xc60 kasan_report+0x244/0x698 __asan_load4+0xe8/0x250 vga_arbiter_add_pci_device+0x60/0xe00 pci_notify+0x88/0x444 notifier_call_chain+0x104/0x320 blocking_notifier_call_chain+0xa0/0x140 device_add+0xac8/0x1d30 device_register+0x58/0x80 vio_register_device_node+0x9ac/0xce0 vio_bus_scan_register_devices+0xc4/0x13c __machine_initcall_pseries_vio_device_init+0x94/0xf0 do_one_initcall+0x12c/0xaa8 kernel_init_freeable+0xa48/0xba8 kernel_init+0x64/0x400 ret_from_kernel_thread+0x5c/0x64 Fix this by creating separate notifier_block structs for each bus type. Fixes: d6b9a81b2a45 ("powerpc: IOMMU fault injection") Reported-by: Nageswara R Sastry Signed-off-by: Russell Currey Tested-by: Nageswara R Sastry Reviewed-by: Andrew Donnellan [mpe: Add #ifdef to fix CONFIG_IBMVIO=n build] Signed-off-by: Michael Ellerman Link: https://msgid.link/20230322035322.328709-1-ruscur@russell.cc --- arch/powerpc/kernel/iommu.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index c52449ae6936a..14251bc5219eb 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -172,17 +172,28 @@ static int fail_iommu_bus_notify(struct notifier_block *nb, return 0; } -static struct notifier_block fail_iommu_bus_notifier = { +/* + * PCI and VIO buses need separate notifier_block structs, since they're linked + * list nodes. Sharing a notifier_block would mean that any notifiers later + * registered for PCI buses would also get called by VIO buses and vice versa. + */ +static struct notifier_block fail_iommu_pci_bus_notifier = { .notifier_call = fail_iommu_bus_notify }; +#ifdef CONFIG_IBMVIO +static struct notifier_block fail_iommu_vio_bus_notifier = { + .notifier_call = fail_iommu_bus_notify +}; +#endif + static int __init fail_iommu_setup(void) { #ifdef CONFIG_PCI - bus_register_notifier(&pci_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&pci_bus_type, &fail_iommu_pci_bus_notifier); #endif #ifdef CONFIG_IBMVIO - bus_register_notifier(&vio_bus_type, &fail_iommu_bus_notifier); + bus_register_notifier(&vio_bus_type, &fail_iommu_vio_bus_notifier); #endif return 0; From f1424755db913c5971686537381588261cdfd1ee Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Aug 2023 13:16:57 +0530 Subject: [PATCH 132/135] powerpc/mm/book3s64: Fix build error with SPARSEMEM disabled With CONFIG_SPARSEMEM disabled the below kernel build error is observed. arch/powerpc/mm/init_64.c:477:38: error: use of undeclared identifier 'SECTION_SIZE_BITS' CONFIG_MEMORY_HOTPLUG depends on CONFIG_SPARSEMEM and it is more clear to describe the code dependency in terms of MEMORY_HOTPLUG. Outside memory hotplug the kernel uses memory_block_size for kernel directmap. Instead of depending on SECTION_SIZE_BITS to compute the direct map page size, add a new #define which defaults to 16M(same as existing SECTION_SIZE) Fixes: 4d15721177d5 ("powerpc/mm: Cleanup memory block size probing") Signed-off-by: "Aneesh Kumar K.V" Closes: https://lore.kernel.org/oe-kbuild-all/202308251532.k9PpWEAD-lkp@intel.com/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20230828074658.59553-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/init_64.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index d74d4a441616a..9f76e109f96a7 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -472,12 +472,23 @@ static int __init dt_scan_mmu_pid_width(unsigned long node, return 1; } +/* + * Outside hotplug the kernel uses this value to map the kernel direct map + * with radix. To be compatible with older kernels, let's keep this value + * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map + * things with 1GB size in the case where we don't support hotplug. + */ +#ifndef CONFIG_MEMORY_HOTPLUG +#define DEFAULT_MEMORY_BLOCK_SIZE SZ_16M +#else +#define DEFAULT_MEMORY_BLOCK_SIZE MIN_MEMORY_BLOCK_SIZE +#endif + static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size) { - unsigned long section_size = 1UL << SECTION_SIZE_BITS; - - for (; *block_size > section_size; *block_size >>= 2) { + unsigned long min_memory_block_size = DEFAULT_MEMORY_BLOCK_SIZE; + for (; *block_size > min_memory_block_size; *block_size >>= 2) { if ((mem_size & *block_size) == 0) break; } @@ -507,7 +518,7 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname, /* * Nothing in the device tree */ - *block_size = MIN_MEMORY_BLOCK_SIZE; + *block_size = DEFAULT_MEMORY_BLOCK_SIZE; else *block_size = of_read_number(prop, dt_root_size_cells); /* From 4c33bf147249ebbf3dded016996a8a24c5737254 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 28 Aug 2023 13:16:58 +0530 Subject: [PATCH 133/135] powerpc/mm/book3s64: Use 256M as the upper limit with coherent device memory attached Commit 4d15721177d5 ("powerpc/mm: Cleanup memory block size probing") used 256MB as the memory block size when we have ibm,coherent-device-memory device tree node present. Instead of returning with 256MB memory block size, continue to check the rest of the memory regions and make sure we can still map them using a 256MB memory block size. Fixes: 4d15721177d5 ("powerpc/mm: Cleanup memory block size probing") Signed-off-by: "Aneesh Kumar K.V" Signed-off-by: Michael Ellerman Link: https://msgid.link/20230828074658.59553-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/mm/init_64.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 9f76e109f96a7..70f8e9ce412e9 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -569,8 +569,12 @@ static int __init probe_memory_block_size(unsigned long node, const char *uname, */ compatible = of_get_flat_dt_prop(node, "compatible", NULL); if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) { - *block_size = SZ_256M; - return 1; + if (*block_size > SZ_256M) + *block_size = SZ_256M; + /* + * We keep 256M as the upper limit with GPU present. + */ + return 0; } } /* continue looking for other memory device types */ From 90bae4d99beb1f31d8bde7c438a36e8875ae6090 Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 28 Aug 2023 13:39:06 -0700 Subject: [PATCH 134/135] powerpc/xmon: Reapply "Relax frame size for clang" This is a manual revert of commit 7f3c5d099b6f ("Revert "powerpc/xmon: Relax frame size for clang"") but using ccflags-$(CONFIG_CC_IS_CLANG) which is shorter. Turns out that this is reproducible still under specific compiler versions (mea culpa: I did not test every supported version of clang), and even a few randconfigs bots found. We'll have to revisit this again in the future, for now back this out. Reported-by: Nathan Chancellor Closes: https://github.com/ClangBuiltLinux/linux/issues/252#issuecomment-1690371256 Reported-by: kernel test robot Closes: https://lore.kernel.org/llvm/202308260344.Vc4Giuk7-lkp@intel.com/ Suggested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Signed-off-by: Nick Desaulniers Signed-off-by: Michael Ellerman Link: https://msgid.link/20230828-ppc_rerevert-v2-1-46b71a3656c6@google.com --- arch/powerpc/xmon/Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index 7705aa74a24d3..682c7c0a6f77d 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -12,6 +12,10 @@ ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE) ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) +# Clang stores addresses on the stack causing the frame size to blow +# out. See https://github.com/ClangBuiltLinux/linux/issues/252 +ccflags-$(CONFIG_CC_IS_CLANG) += -Wframe-larger-than=4096 + obj-y += xmon.o nonstdio.o spr_access.o xmon_bpts.o ifdef CONFIG_XMON_DISASSEMBLY From 85a616416e9e01db0bfa92f26457e92642e2236b Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 29 Aug 2023 15:58:37 -0700 Subject: [PATCH 135/135] macintosh/ams: linux/platform_device.h is needed ams.h uses struct platform_device, so the header should be used to prevent build errors: drivers/macintosh/ams/ams-input.c: In function 'ams_input_enable': drivers/macintosh/ams/ams-input.c:68:45: error: invalid use of undefined type 'struct platform_device' 68 | input->dev.parent = &ams_info.of_dev->dev; drivers/macintosh/ams/ams-input.c: In function 'ams_input_init': drivers/macintosh/ams/ams-input.c:146:51: error: invalid use of undefined type 'struct platform_device' 146 | return device_create_file(&ams_info.of_dev->dev, &dev_attr_joystick); drivers/macintosh/ams/ams-input.c: In function 'ams_input_exit': drivers/macintosh/ams/ams-input.c:151:44: error: invalid use of undefined type 'struct platform_device' 151 | device_remove_file(&ams_info.of_dev->dev, &dev_attr_joystick); drivers/macintosh/ams/ams-input.c: In function 'ams_input_init': drivers/macintosh/ams/ams-input.c:147:1: error: control reaches end of non-void function [-Werror=return-type] 147 | } Fixes: 233d687d1b78 ("macintosh: Explicitly include correct DT includes") Signed-off-by: Randy Dunlap Signed-off-by: Michael Ellerman Link: https://msgid.link/20230829225837.15520-1-rdunlap@infradead.org --- drivers/macintosh/ams/ams.h | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/macintosh/ams/ams.h b/drivers/macintosh/ams/ams.h index 2c159c8844c19..4b93c766a5516 100644 --- a/drivers/macintosh/ams/ams.h +++ b/drivers/macintosh/ams/ams.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include