From 200e4048468d0e83fdc78c21edf8235deca0901c Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Thu, 12 Sep 2024 10:45:02 +0100 Subject: [PATCH 1/6] drivers: irq-bcm2836: preserve unrelated bits in LOCAL_GPU_ROUTING Interrupts are dispatched round-robin but doing so trampled FIQ routing. Taking a FIQ on a core without a handler installed is fatal. Only modify bits 1:0 which are the IRQ route bits. Signed-off-by: Jonathan Bell --- drivers/irqchip/irq-bcm2836.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index 42660f14aaf67a..3766ab72da06ab 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -94,12 +94,17 @@ void bcm2836_arm_irqchip_spin_gpu_irq(void) u32 i; void __iomem *gpurouting = (intc.base + LOCAL_GPU_ROUTING); u32 routing_val = readl(gpurouting); + u32 irq_route; + + /* Preserve FIQ routing bits */ + irq_route = routing_val & 0x3; + routing_val &= ~0x3; for (i = 1; i <= 3; i++) { - u32 new_routing_val = (routing_val + i) & 3; + irq_route = (irq_route + i) & 0x3; - if (cpu_active(new_routing_val)) { - writel(new_routing_val, gpurouting); + if (cpu_active(irq_route)) { + writel(irq_route | routing_val, gpurouting); return; } } From 4f4cf19127850765882fb8870176f2f84c1aa9fc Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Thu, 12 Sep 2024 11:09:30 +0100 Subject: [PATCH 2/6] drivers: irq-bcm283x: swizzle interrupts on ARMv7 too BCM2836 with Cortex-A7 cores has almost the same ARM_LOCAL interrupt routing logic as BCM2837, so relax the compile guard to CONFIG_SMP not CONFIG_ARM64. Signed-off-by: Jonathan Bell --- drivers/irqchip/irq-bcm2835.c | 4 ++-- drivers/irqchip/irq-bcm2836.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/irqchip/irq-bcm2835.c b/drivers/irqchip/irq-bcm2835.c index 8e995ade76d6bd..68a9b046e2c3d0 100644 --- a/drivers/irqchip/irq-bcm2835.c +++ b/drivers/irqchip/irq-bcm2835.c @@ -152,7 +152,7 @@ static void armctrl_unmask_irq(struct irq_data *d) } } -#ifdef CONFIG_ARM64 +#if defined(CONFIG_SMP) void bcm2836_arm_irqchip_spin_gpu_irq(void); static void armctrl_ack_irq(struct irq_data *d) @@ -166,7 +166,7 @@ static struct irq_chip armctrl_chip = { .name = "ARMCTRL-level", .irq_mask = armctrl_mask_irq, .irq_unmask = armctrl_unmask_irq, -#ifdef CONFIG_ARM64 +#if defined(CONFIG_SMP) .irq_ack = armctrl_ack_irq #endif }; diff --git a/drivers/irqchip/irq-bcm2836.c b/drivers/irqchip/irq-bcm2836.c index 3766ab72da06ab..00aea693d4b6d7 100644 --- a/drivers/irqchip/irq-bcm2836.c +++ b/drivers/irqchip/irq-bcm2836.c @@ -87,7 +87,7 @@ static void bcm2836_arm_irqchip_unmask_gpu_irq(struct irq_data *d) { } -#ifdef CONFIG_ARM64 +#if defined(CONFIG_SMP) void bcm2836_arm_irqchip_spin_gpu_irq(void) { From aae05ae4f7bee41996ac12eea35015382255f189 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Thu, 12 Sep 2024 13:03:45 +0100 Subject: [PATCH 3/6] drivers: dwc_otg: move FIQ locking functions to header file Also declare as static inline, as they should be. Signed-off-by: Jonathan Bell --- drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c | 64 -------------------- drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h | 70 ++++++++++++++++++++-- 2 files changed, 65 insertions(+), 69 deletions(-) diff --git a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c index 6d9faea2146210..c63a2813cc7c39 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c +++ b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c @@ -74,70 +74,6 @@ void notrace _fiq_print(enum fiq_debug_level dbg_lvl, volatile struct fiq_state } } - -#ifdef CONFIG_ARM64 - -inline void fiq_fsm_spin_lock(fiq_lock_t *lock) -{ - spin_lock((spinlock_t *)lock); -} - -inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) -{ - spin_unlock((spinlock_t *)lock); -} - -#else - -/** - * fiq_fsm_spin_lock() - ARMv6+ bare bones spinlock - * Must be called with local interrupts and FIQ disabled. - */ -#if defined(CONFIG_ARCH_BCM2835) && defined(CONFIG_SMP) -inline void fiq_fsm_spin_lock(fiq_lock_t *lock) -{ - unsigned long tmp; - uint32_t newval; - fiq_lock_t lockval; - /* Nested locking, yay. If we are on the same CPU as the fiq, then the disable - * will be sufficient. If we are on a different CPU, then the lock protects us. */ - prefetchw(&lock->slock); - asm volatile ( - "1: ldrex %0, [%3]\n" - " add %1, %0, %4\n" - " strex %2, %1, [%3]\n" - " teq %2, #0\n" - " bne 1b" - : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) - : "r" (&lock->slock), "I" (1 << 16) - : "cc"); - - while (lockval.tickets.next != lockval.tickets.owner) { - wfe(); - lockval.tickets.owner = READ_ONCE(lock->tickets.owner); - } - smp_mb(); -} -#else -inline void fiq_fsm_spin_lock(fiq_lock_t *lock) { } -#endif - -/** - * fiq_fsm_spin_unlock() - ARMv6+ bare bones spinunlock - */ -#if defined(CONFIG_ARCH_BCM2835) && defined(CONFIG_SMP) -inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) -{ - smp_mb(); - lock->tickets.owner++; - dsb_sev(); -} -#else -inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) { } -#endif - -#endif - /** * fiq_fsm_restart_channel() - Poke channel enable bit for a split transaction * @channel: channel to re-enable diff --git a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h index 8b080b7882fb23..266c4b5e6cf726 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h +++ b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h @@ -135,6 +135,7 @@ typedef spinlock_t fiq_lock_t; #else +#define TICKET_SHIFT 16 typedef struct { union { uint32_t slock; @@ -143,7 +144,70 @@ typedef struct { uint16_t next; } tickets; }; -} fiq_lock_t; +} __aligned(4) fiq_lock_t; + +#endif + +#ifdef CONFIG_ARM64 + +static inline void fiq_fsm_spin_lock(fiq_lock_t *lock) +{ + spin_lock((spinlock_t *)lock); +} + +static inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) +{ + spin_unlock((spinlock_t *)lock); +} + +#else + +/** + * fiq_fsm_spin_lock() - ARMv6+ bare bones spinlock + * Must be called with local interrupts and FIQ disabled. + */ +#if defined(CONFIG_ARCH_BCM2835) && defined(CONFIG_SMP) +static inline void fiq_fsm_spin_lock(fiq_lock_t *lock) +{ + unsigned long tmp; + uint32_t newval; + fiq_lock_t lockval; + /* Nested locking, yay. If we are on the same CPU as the fiq, then the disable + * will be sufficient. If we are on a different CPU, then the lock protects us. */ + prefetchw(&lock->slock); + asm volatile ( + "1: ldrex %0, [%3]\n" + " add %1, %0, %4\n" + " strex %2, %1, [%3]\n" + " teq %2, #0\n" + " bne 1b" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp) + : "r" (&lock->slock), "I" (1 << TICKET_SHIFT) + : "cc"); + + while (lockval.tickets.next != lockval.tickets.owner) { + wfe(); + lockval.tickets.owner = READ_ONCE(lock->tickets.owner); + } + smp_mb(); +} +#else +static inline void fiq_fsm_spin_lock(fiq_lock_t *lock) { } +#endif + +/** + * fiq_fsm_spin_unlock() - ARMv6+ bare bones spinunlock + */ +#if defined(CONFIG_ARCH_BCM2835) && defined(CONFIG_SMP) +static inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) +{ + smp_mb(); + lock->tickets.owner++; + dsb_sev(); +} +#else +static inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) { } +#endif #endif @@ -380,10 +444,6 @@ extern void local_fiq_disable(void); #endif -extern void fiq_fsm_spin_lock(fiq_lock_t *lock); - -extern void fiq_fsm_spin_unlock(fiq_lock_t *lock); - extern int fiq_fsm_too_late(struct fiq_state *st, int n); extern int fiq_fsm_tt_in_use(struct fiq_state *st, int num_channels, int n); From 07fa5e7a8d95fb661d51070541ac0c7cc4d6023d Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Thu, 12 Sep 2024 13:22:48 +0100 Subject: [PATCH 4/6] drivers: dwc_otg: add ticket-based spinlock for ARM64 The ARM64 architecture uses qspinlock which has a fast and slow path. This isn't ideal for all claimers of a lock operating in interrupt context. Add a ticket-based lock similar to the armv6/7 implementation. Based on an upstream patch that was abandoned in favour of qspinlock. Link: https://patchwork.kernel.org/project/linux-arm-kernel/patch/1381330468-32625-2-git-send-email-will.deacon@arm.com/ Signed-off-by: Jonathan Bell --- drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h | 45 ++++++++++++++++------ drivers/usb/host/dwc_otg/dwc_otg_hcd.c | 4 -- 2 files changed, 33 insertions(+), 16 deletions(-) diff --git a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h index 266c4b5e6cf726..1399d579eb5898 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h +++ b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h @@ -129,12 +129,6 @@ enum fiq_debug_level { FIQDBG_PORTHUB = (1 << 3), }; -#ifdef CONFIG_ARM64 - -typedef spinlock_t fiq_lock_t; - -#else - #define TICKET_SHIFT 16 typedef struct { union { @@ -146,18 +140,45 @@ typedef struct { }; } __aligned(4) fiq_lock_t; -#endif - -#ifdef CONFIG_ARM64 - +#if defined(CONFIG_ARM64) static inline void fiq_fsm_spin_lock(fiq_lock_t *lock) { - spin_lock((spinlock_t *)lock); + unsigned int tmp; + fiq_lock_t lockval, newval; + + asm volatile( + /* Atomically increment the next ticket. */ + " prfm pstl1strm, %3\n" + "1: ldaxr %w0, %3\n" + " add %w1, %w0, %w5\n" + " stxr %w2, %w1, %3\n" + " cbnz %w2, 1b\n" + /* Did we get the lock? */ + " eor %w1, %w0, %w0, ror #16\n" + " cbz %w1, 3f\n" + /* + * No: spin on the owner. Send a local event to avoid missing an + * unlock before the exclusive load. + */ + " sevl\n" + "2: wfe\n" + " ldaxrh %w2, %4\n" + " eor %w1, %w2, %w0, lsr #16\n" + " cbnz %w1, 2b\n" + /* We got the lock. Critical section starts here. */ + "3:" + : "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock) + : "Q" (lock->tickets.owner), "I" (1 << TICKET_SHIFT) + : "memory"); } static inline void fiq_fsm_spin_unlock(fiq_lock_t *lock) { - spin_unlock((spinlock_t *)lock); + asm volatile( + " stlrh %w1, %0\n" + : "=Q" (lock->tickets.owner) + : "r" (lock->tickets.owner + 1) + : "memory"); } #else diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c index fa35d944519c77..d22ab48004f17c 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c +++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c @@ -1042,10 +1042,6 @@ int dwc_otg_hcd_init(dwc_otg_hcd_t * hcd, dwc_otg_core_if_t * core_if) } DWC_MEMSET(hcd->fiq_state, 0, (sizeof(struct fiq_state) + (sizeof(struct fiq_channel_state) * num_channels))); -#ifdef CONFIG_ARM64 - spin_lock_init(&hcd->fiq_state->lock); -#endif - hcd->fiq_state->dummy_send = DWC_DMA_ALLOC_ATOMIC(dev, 16, &hcd->fiq_state->dummy_send_dma); From 173bd5217267c29191b1163b13c758c54f29a664 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Thu, 12 Sep 2024 14:09:51 +0100 Subject: [PATCH 5/6] drivers: dwc_otg: reduce loglevel for probe messages Warning on normal behaviour isn't sensible and is spammy. Demote to info. Signed-off-by: Jonathan Bell --- drivers/usb/host/dwc_otg/dwc_otg_hcd.c | 2 +- drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c index d22ab48004f17c..a89ed6598c5a29 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c +++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c @@ -1063,7 +1063,7 @@ int dwc_otg_hcd_init(dwc_otg_hcd_t * hcd, dwc_otg_core_if_t * core_if) * moderately readable array casts. */ hcd->fiq_dmab = DWC_DMA_ALLOC(dev, (sizeof(struct fiq_dma_channel) * num_channels), &hcd->fiq_state->dma_base); - DWC_WARN("FIQ DMA bounce buffers: virt = %px dma = %pad len=%zu", + DWC_INFO("FIQ DMA bounce buffers: virt = %px dma = %pad len=%zu", hcd->fiq_dmab, &hcd->fiq_state->dma_base, sizeof(struct fiq_dma_channel) * num_channels); diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c index ca646860a09289..b3728c456fe7cd 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c +++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c @@ -454,8 +454,8 @@ static void hcd_init_fiq(void *cookie) DWC_ERROR("Can't claim FIQ"); BUG(); } - DWC_WARN("FIQ on core %d", smp_processor_id()); - DWC_WARN("FIQ ASM at %px length %d", &_dwc_otg_fiq_stub, (int)(&_dwc_otg_fiq_stub_end - &_dwc_otg_fiq_stub)); + DWC_INFO("FIQ on core %d", smp_processor_id()); + DWC_INFO("FIQ ASM at %px length %d", &_dwc_otg_fiq_stub, (int)(&_dwc_otg_fiq_stub_end - &_dwc_otg_fiq_stub)); set_fiq_handler((void *) &_dwc_otg_fiq_stub, &_dwc_otg_fiq_stub_end - &_dwc_otg_fiq_stub); memset(®s,0,sizeof(regs)); @@ -482,7 +482,7 @@ static void hcd_init_fiq(void *cookie) otg_dev->os_dep.mphi_base + 0x1f0; dwc_otg_hcd->fiq_state->mphi_regs.swirq_clr = otg_dev->os_dep.mphi_base + 0x1f4; - DWC_WARN("Fake MPHI regs_base at %px", + DWC_INFO("Fake MPHI regs_base at %px", dwc_otg_hcd->fiq_state->mphi_regs.base); } else { dwc_otg_hcd->fiq_state->mphi_regs.ctrl = @@ -493,16 +493,16 @@ static void hcd_init_fiq(void *cookie) = otg_dev->os_dep.mphi_base + 0x2c; dwc_otg_hcd->fiq_state->mphi_regs.intstat = otg_dev->os_dep.mphi_base + 0x50; - DWC_WARN("MPHI regs_base at %px", + DWC_INFO("MPHI regs_base at %px", dwc_otg_hcd->fiq_state->mphi_regs.base); //Enable mphi peripheral writel((1<<31),dwc_otg_hcd->fiq_state->mphi_regs.ctrl); #ifdef DEBUG if (readl(dwc_otg_hcd->fiq_state->mphi_regs.ctrl) & 0x80000000) - DWC_WARN("MPHI periph has been enabled"); + DWC_INFO("MPHI periph has been enabled"); else - DWC_WARN("MPHI periph has NOT been enabled"); + DWC_INFO("MPHI periph has NOT been enabled"); #endif } // Enable FIQ interrupt from USB peripheral From 7d13394e196a5b056f883eff06a5d33a3a37b146 Mon Sep 17 00:00:00 2001 From: Jonathan Bell Date: Fri, 13 Sep 2024 10:20:29 +0100 Subject: [PATCH 6/6] drivers: dwc_otg: don't call disable_irq on the fake FIQ The local spinlock protects the handlers from racing against each other on separate cores, hard IRQs don't preempt each other, and disabling/enabling the interrupt is more expensive than letting the fake FIQ contend the spinlock. So turn local_fiq_en/disable into no-ops. Signed-off-by: Jonathan Bell --- drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c | 23 +++++++++----------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c b/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c index b3728c456fe7cd..063bf0b17c98ac 100644 --- a/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c +++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd_linux.c @@ -406,19 +406,17 @@ static struct dwc_otg_hcd_function_ops hcd_fops = { #ifdef CONFIG_ARM64 -static int simfiq_irq = -1; - -void local_fiq_enable(void) -{ - if (simfiq_irq >= 0) - enable_irq(simfiq_irq); -} +/* + * With no FIQ support on AARCH64, the "FIQ handler" is demoted to a + * regular IRQ handler. With a nested spinlock preventing the two + * handlers from racing against each other, and a HCD lock preventing + * thread context from racing against the "bottom half" IRQ, there's no + * point manipulating global IRQ enable/disable state - so these two + * functions are no-ops. + */ +void local_fiq_enable(void) { } -void local_fiq_disable(void) -{ - if (simfiq_irq >= 0) - disable_irq(simfiq_irq); -} +void local_fiq_disable(void) { } irqreturn_t fiq_irq_handler(int irq, void *dev_id) { @@ -521,7 +519,6 @@ static void hcd_init_fiq(void *cookie) return; } - simfiq_irq = irq; #else #ifdef CONFIG_GENERIC_IRQ_MULTI_HANDLER irq = otg_dev->os_dep.fiq_num;