From f6d4a9316bf3eb1f3d6bda99d8e1763cfee63f08 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Tue, 6 Aug 2024 19:49:40 +0100 Subject: [PATCH 01/17] ports/rp2: PSRAM support. Add PSRAM support with auto detection. Performs a best-effort attempt to detect attached PSRAM, configure it and *add* it to the MicroPython heap. If PSRAM is not present, should fall back to use internal RAM. Introduce two new port/board defines: * MICROPY_HW_ENABLE_PSRAM to enable PSRAM. * MICROPY_HW_PSRAM_CS_PIN to define the chip-select pin. Changes: ports/rp2/rp2_psram.c/h: Add new PSRAM module. ports/rp2/main.c: Add optional PSRAM support. ports/rp2/CMakeLists.txt: Include rp2_psram.c. ports/rp2/rp2_flash.c: Add buffered write to avoid reads from PSRAM. ports/rp2/mpconfigport.h: Enable MICROPY_GC_SPLIT_HEAP for boards that set MICROPY_HW_ENABLE_PSRAM. Co-authored-by: Kirk Benell Co-authored-by: Mike Bell Signed-off-by: Phil Howard --- ports/rp2/CMakeLists.txt | 1 + ports/rp2/main.c | 8 ++ ports/rp2/mpconfigport.h | 3 + ports/rp2/rp2_flash.c | 48 +++++++++-- ports/rp2/rp2_psram.c | 180 +++++++++++++++++++++++++++++++++++++++ ports/rp2/rp2_psram.h | 11 +++ 6 files changed, 246 insertions(+), 5 deletions(-) create mode 100644 ports/rp2/rp2_psram.c create mode 100644 ports/rp2/rp2_psram.h diff --git a/ports/rp2/CMakeLists.txt b/ports/rp2/CMakeLists.txt index 4baaf7debf6c5..50b6f1fbc612c 100644 --- a/ports/rp2/CMakeLists.txt +++ b/ports/rp2/CMakeLists.txt @@ -165,6 +165,7 @@ set(MICROPY_SOURCE_PORT pendsv.c rp2_flash.c rp2_pio.c + rp2_psram.c rp2_dma.c uart.c usbd.c diff --git a/ports/rp2/main.c b/ports/rp2/main.c index d6bf448267152..2486d72ebb4bb 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -26,6 +26,7 @@ #include +#include "rp2_psram.h" #include "py/compile.h" #include "py/cstack.h" #include "py/runtime.h" @@ -120,7 +121,14 @@ int main(int argc, char **argv) { // Initialise stack extents and GC heap. mp_cstack_init_with_top(&__StackTop, &__StackTop - &__StackBottom); + gc_init(&__GcHeapStart, &__GcHeapEnd); + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); + if (psram_size) { + gc_add((void *)PSRAM_LOCATION, (void *)(PSRAM_LOCATION + psram_size)); + } + #endif #if MICROPY_PY_LWIP // lwIP doesn't allow to reinitialise itself by subsequent calls to this function diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 9a11e6048c6b4..8d807053c3577 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -73,6 +73,9 @@ // Memory allocation policies #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t +#ifdef MICROPY_HW_ENABLE_PSRAM +#define MICROPY_GC_SPLIT_HEAP (1) +#endif #define MICROPY_ALLOC_PATH_MAX (128) #define MICROPY_QSTR_BYTES_IN_HASH (1) diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index c1acb54e75748..722bf5c0b76c0 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -26,6 +26,7 @@ #include +#include "rp2_psram.h" #include "py/mphal.h" #include "py/runtime.h" #include "extmod/vfs.h" @@ -76,10 +77,21 @@ static uint32_t begin_critical_flash_section(void) { if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { multicore_lockout_start_blocking(); } - return save_and_disable_interrupts(); + uint32_t state = save_and_disable_interrupts(); + + // We're about to invalidate the XIP cache, clean it first to commit any dirty writes to PSRAM + uint8_t *maintenance_ptr = (uint8_t *)XIP_MAINTENANCE_BASE; + for (int i = 1; i < 16 * 1024; i += 8) { + maintenance_ptr[i] = 0; + } + + return state; } static void end_critical_flash_section(uint32_t state) { + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif restore_interrupts(state); if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { multicore_lockout_end_blocking(); @@ -145,11 +157,16 @@ static mp_obj_t rp2_flash_readblocks(size_t n_args, const mp_obj_t *args) { } static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(rp2_flash_readblocks_obj, 3, 4, rp2_flash_readblocks); +static inline size_t min_size(size_t a, size_t b) { + return a < b ? a : b; +} + static mp_obj_t rp2_flash_writeblocks(size_t n_args, const mp_obj_t *args) { rp2_flash_obj_t *self = MP_OBJ_TO_PTR(args[0]); uint32_t offset = mp_obj_get_int(args[1]) * BLOCK_SIZE_BYTES; mp_buffer_info_t bufinfo; mp_get_buffer_raise(args[2], &bufinfo, MP_BUFFER_READ); + if (n_args == 3) { mp_uint_t atomic_state = begin_critical_flash_section(); flash_range_erase(self->flash_base + offset, bufinfo.len); @@ -159,10 +176,31 @@ static mp_obj_t rp2_flash_writeblocks(size_t n_args, const mp_obj_t *args) { } else { offset += mp_obj_get_int(args[3]); } - mp_uint_t atomic_state = begin_critical_flash_section(); - flash_range_program(self->flash_base + offset, bufinfo.buf, bufinfo.len); - end_critical_flash_section(atomic_state); - mp_event_handle_nowait(); + + if ((uintptr_t)bufinfo.buf >= SRAM_BASE) { + mp_uint_t atomic_state = begin_critical_flash_section(); + flash_range_program(self->flash_base + offset, bufinfo.buf, bufinfo.len); + end_critical_flash_section(atomic_state); + mp_event_handle_nowait(); + } else { + size_t bytes_left = bufinfo.len; + size_t bytes_offset = 0; + static uint8_t copy_buffer[BLOCK_SIZE_BYTES] = {0}; + + while (bytes_left) { + memcpy(copy_buffer, bufinfo.buf + bytes_offset, min_size(bytes_left, BLOCK_SIZE_BYTES)); + mp_uint_t atomic_state = begin_critical_flash_section(); + flash_range_program(self->flash_base + offset + bytes_offset, copy_buffer, min_size(bytes_left, BLOCK_SIZE_BYTES)); + end_critical_flash_section(atomic_state); + bytes_offset += BLOCK_SIZE_BYTES; + if (bytes_left <= BLOCK_SIZE_BYTES) { + break; + } + bytes_left -= BLOCK_SIZE_BYTES; + mp_event_handle_nowait(); + } + } + // TODO check return value return mp_const_none; } diff --git a/ports/rp2/rp2_psram.c b/ports/rp2/rp2_psram.c new file mode 100644 index 0000000000000..07fd28c49ca8e --- /dev/null +++ b/ports/rp2/rp2_psram.c @@ -0,0 +1,180 @@ +#include "hardware/structs/ioqspi.h" +#include "hardware/structs/qmi.h" +#include "hardware/structs/xip_ctrl.h" +#include "hardware/sync.h" +#include "rp2_psram.h" + + +void __no_inline_not_in_flash_func(psram_set_qmi_timing)() { + // Make sure flash is deselected - QMI doesn't appear to have a busy flag(!) + while ((ioqspi_hw->io[1].status & IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) != IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) { + ; + } + + // For > 133 MHz + qmi_hw->m[0].timing = 0x40000202; + + // For <= 133 MHz + // qmi_hw->m[0].timing = 0x40000101; + + // Force a read through XIP to ensure the timing is applied + volatile uint32_t *ptr = (volatile uint32_t *)0x14000000; + (void)*ptr; +} + +size_t __no_inline_not_in_flash_func(psram_detect)() { + int psram_size = 0; + + uint32_t intr_stash = save_and_disable_interrupts(); + + // Try and read the PSRAM ID via direct_csr. + qmi_hw->direct_csr = 30 << QMI_DIRECT_CSR_CLKDIV_LSB | QMI_DIRECT_CSR_EN_BITS; + + // Need to poll for the cooldown on the last XIP transfer to expire + // (via direct-mode BUSY flag) before it is safe to perform the first + // direct-mode operation + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) { + } + + // Exit out of QMI in case we've inited already + qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS; + + // Transmit as quad. + qmi_hw->direct_tx = QMI_DIRECT_TX_OE_BITS | QMI_DIRECT_TX_IWIDTH_VALUE_Q << QMI_DIRECT_TX_IWIDTH_LSB | 0xf5; + + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) { + } + + (void)qmi_hw->direct_rx; + + qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS); + + // Read the id + qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS; + uint8_t kgd = 0; + uint8_t eid = 0; + + for (size_t i = 0; i < 7; i++) + { + if (i == 0) { + qmi_hw->direct_tx = 0x9f; + } else { + qmi_hw->direct_tx = 0xff; + } + + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_TXEMPTY_BITS) == 0) { + } + + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) { + } + + if (i == 5) { + kgd = qmi_hw->direct_rx; + } else if (i == 6) { + eid = qmi_hw->direct_rx; + } else { + (void)qmi_hw->direct_rx; + } + } + + // Disable direct csr. + qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS | QMI_DIRECT_CSR_EN_BITS); + + if (kgd == 0x5D) { + psram_size = 1024 * 1024; // 1 MiB + uint8_t size_id = eid >> 5; + if (eid == 0x26 || size_id == 2) { + psram_size *= 8; // 8 MiB + } else if (size_id == 0) { + psram_size *= 2; // 2 MiB + } else if (size_id == 1) { + psram_size *= 4; // 4 MiB + } + } + + restore_interrupts(intr_stash); + return psram_size; +} + +size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { + gpio_set_function(cs_pin, GPIO_FUNC_XIP_CS1); + + size_t psram_size = psram_detect(); + + if (!psram_size) { + return 0; + } + + psram_set_qmi_timing(); + + // Enable direct mode, PSRAM CS, clkdiv of 10. + qmi_hw->direct_csr = 10 << QMI_DIRECT_CSR_CLKDIV_LSB | \ + QMI_DIRECT_CSR_EN_BITS | \ + QMI_DIRECT_CSR_AUTO_CS1N_BITS; + while (qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) { + ; + } + + // Enable QPI mode on the PSRAM + const uint CMD_QPI_EN = 0x35; + qmi_hw->direct_tx = QMI_DIRECT_TX_NOPUSH_BITS | CMD_QPI_EN; + + while (qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) { + ; + } + + #if 0 + // Set PSRAM timing for APS6404: + // - Max select assumes a sys clock speed >= 240MHz + // - Min deselect assumes a sys clock speed <= 305MHz + // - Clkdiv of 2 is OK up to 266MHz. + qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | + QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | + 30 << QMI_M1_TIMING_MAX_SELECT_LSB | + 5 << QMI_M1_TIMING_MIN_DESELECT_LSB | + 3 << QMI_M1_TIMING_RXDELAY_LSB | + 2 << QMI_M1_TIMING_CLKDIV_LSB; + #else + // Set PSRAM timing for APS6404: + // - Max select assumes a sys clock speed >= 120MHz + // - Min deselect assumes a sys clock speed <= 138MHz + // - Clkdiv of 1 is OK up to 133MHz. + qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | + QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | + 15 << QMI_M1_TIMING_MAX_SELECT_LSB | + 2 << QMI_M1_TIMING_MIN_DESELECT_LSB | + 2 << QMI_M1_TIMING_RXDELAY_LSB | + 1 << QMI_M1_TIMING_CLKDIV_LSB; + #endif + + // Set PSRAM commands and formats + qmi_hw->m[1].rfmt = + QMI_M0_RFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_PREFIX_WIDTH_LSB | \ + QMI_M0_RFMT_ADDR_WIDTH_VALUE_Q << QMI_M0_RFMT_ADDR_WIDTH_LSB | \ + QMI_M0_RFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_SUFFIX_WIDTH_LSB | \ + QMI_M0_RFMT_DUMMY_WIDTH_VALUE_Q << QMI_M0_RFMT_DUMMY_WIDTH_LSB | \ + QMI_M0_RFMT_DATA_WIDTH_VALUE_Q << QMI_M0_RFMT_DATA_WIDTH_LSB | \ + QMI_M0_RFMT_PREFIX_LEN_VALUE_8 << QMI_M0_RFMT_PREFIX_LEN_LSB | \ + 6 << QMI_M0_RFMT_DUMMY_LEN_LSB; + + qmi_hw->m[1].rcmd = 0xEB; + + qmi_hw->m[1].wfmt = + QMI_M0_WFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_WFMT_PREFIX_WIDTH_LSB | \ + QMI_M0_WFMT_ADDR_WIDTH_VALUE_Q << QMI_M0_WFMT_ADDR_WIDTH_LSB | \ + QMI_M0_WFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M0_WFMT_SUFFIX_WIDTH_LSB | \ + QMI_M0_WFMT_DUMMY_WIDTH_VALUE_Q << QMI_M0_WFMT_DUMMY_WIDTH_LSB | \ + QMI_M0_WFMT_DATA_WIDTH_VALUE_Q << QMI_M0_WFMT_DATA_WIDTH_LSB | \ + QMI_M0_WFMT_PREFIX_LEN_VALUE_8 << QMI_M0_WFMT_PREFIX_LEN_LSB; + + qmi_hw->m[1].wcmd = 0x38; + + // Disable direct mode + qmi_hw->direct_csr = 0; + + // Enable writes to PSRAM + hw_set_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_WRITABLE_M1_BITS); + + // TODO: Detect PSRAM ID and size + return psram_size; +} diff --git a/ports/rp2/rp2_psram.h b/ports/rp2/rp2_psram.h new file mode 100644 index 0000000000000..cd791602cdd68 --- /dev/null +++ b/ports/rp2/rp2_psram.h @@ -0,0 +1,11 @@ +#include "pico/stdlib.h" + +#ifndef MICROPY_INCLUDED_RP2_MACHINE_PSRAM_H +#define MICROPY_INCLUDED_RP2_MACHINE_PSRAM_H + +#define PSRAM_LOCATION _u(0x11000000) + +extern void psram_set_qmi_timing(); +extern size_t psram_init(uint cs_pin); + +#endif From 57b3e8414272fddfc0670162bde1daad2f9a730d Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Fri, 9 Aug 2024 10:16:55 +0100 Subject: [PATCH 02/17] ports/rp2: PSRAM: Fix RP2040/Pico build. Signed-off-by: Phil Howard --- ports/rp2/CMakeLists.txt | 7 ++++++- ports/rp2/rp2_flash.c | 2 ++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ports/rp2/CMakeLists.txt b/ports/rp2/CMakeLists.txt index 50b6f1fbc612c..b0569f84e610c 100644 --- a/ports/rp2/CMakeLists.txt +++ b/ports/rp2/CMakeLists.txt @@ -165,7 +165,6 @@ set(MICROPY_SOURCE_PORT pendsv.c rp2_flash.c rp2_pio.c - rp2_psram.c rp2_dma.c uart.c usbd.c @@ -174,6 +173,12 @@ set(MICROPY_SOURCE_PORT ${CMAKE_BINARY_DIR}/pins_${MICROPY_BOARD}.c ) +if(PICO_RP2350) + list(APPEND MICROPY_SOURCE_PORT + rp2_psram.c + ) +endif() + set(MICROPY_SOURCE_QSTR ${MICROPY_SOURCE_PY} ${MICROPY_DIR}/shared/readline/readline.c diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index 722bf5c0b76c0..4386986011db2 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -79,11 +79,13 @@ static uint32_t begin_critical_flash_section(void) { } uint32_t state = save_and_disable_interrupts(); + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM // We're about to invalidate the XIP cache, clean it first to commit any dirty writes to PSRAM uint8_t *maintenance_ptr = (uint8_t *)XIP_MAINTENANCE_BASE; for (int i = 1; i < 16 * 1024; i += 8) { maintenance_ptr[i] = 0; } + #endif return state; } From 39a2f36c2762c6f934ab7fa4a17bfbe9e7ba9363 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Fri, 9 Aug 2024 14:15:10 +0100 Subject: [PATCH 03/17] ports/rp2: Re-init PSRAM on CPU freq change. Signed-off-by: Phil Howard --- ports/rp2/modmachine.c | 4 +++ ports/rp2/rp2_psram.c | 60 ++++++++++++++++++++++-------------------- 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/ports/rp2/modmachine.c b/ports/rp2/modmachine.c index 2faf0bc6f8713..1fb6bc6df9d8e 100644 --- a/ports/rp2/modmachine.c +++ b/ports/rp2/modmachine.c @@ -31,6 +31,7 @@ #include "mp_usbd.h" #include "modmachine.h" #include "uart.h" +#include "rp2_psram.h" #include "clocks_extra.h" #include "hardware/pll.h" #include "hardware/structs/rosc.h" @@ -115,6 +116,9 @@ static void mp_machine_set_freq(size_t n_args, const mp_obj_t *args) { setup_default_uart(); mp_uart_init(); #endif + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif } static void mp_machine_idle(void) { diff --git a/ports/rp2/rp2_psram.c b/ports/rp2/rp2_psram.c index 07fd28c49ca8e..90c370c86a55e 100644 --- a/ports/rp2/rp2_psram.c +++ b/ports/rp2/rp2_psram.c @@ -1,6 +1,7 @@ #include "hardware/structs/ioqspi.h" #include "hardware/structs/qmi.h" #include "hardware/structs/xip_ctrl.h" +#include "hardware/clocks.h" #include "hardware/sync.h" #include "rp2_psram.h" @@ -11,11 +12,13 @@ void __no_inline_not_in_flash_func(psram_set_qmi_timing)() { ; } - // For > 133 MHz - qmi_hw->m[0].timing = 0x40000202; - - // For <= 133 MHz - // qmi_hw->m[0].timing = 0x40000101; + if (clock_get_hz(clk_sys) > 133000000) { + // For > 133 MHz + qmi_hw->m[0].timing = 0x40000202; + } else { + // For <= 133 MHz + qmi_hw->m[0].timing = 0x40000101; + } // Force a read through XIP to ensure the timing is applied volatile uint32_t *ptr = (volatile uint32_t *)0x14000000; @@ -123,29 +126,29 @@ size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { ; } - #if 0 - // Set PSRAM timing for APS6404: - // - Max select assumes a sys clock speed >= 240MHz - // - Min deselect assumes a sys clock speed <= 305MHz - // - Clkdiv of 2 is OK up to 266MHz. - qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | - QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | - 30 << QMI_M1_TIMING_MAX_SELECT_LSB | - 5 << QMI_M1_TIMING_MIN_DESELECT_LSB | - 3 << QMI_M1_TIMING_RXDELAY_LSB | - 2 << QMI_M1_TIMING_CLKDIV_LSB; - #else - // Set PSRAM timing for APS6404: - // - Max select assumes a sys clock speed >= 120MHz - // - Min deselect assumes a sys clock speed <= 138MHz - // - Clkdiv of 1 is OK up to 133MHz. - qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | - QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | - 15 << QMI_M1_TIMING_MAX_SELECT_LSB | - 2 << QMI_M1_TIMING_MIN_DESELECT_LSB | - 2 << QMI_M1_TIMING_RXDELAY_LSB | - 1 << QMI_M1_TIMING_CLKDIV_LSB; - #endif + if (clock_get_hz(clk_sys) >= 120000000) { + // Set PSRAM timing for APS6404: + // - Max select assumes a sys clock speed >= 120MHz + // - Min deselect assumes a sys clock speed <= 305MHz + // - Clkdiv of 2 is OK up to 266MHz. + qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | + QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | + 15 << QMI_M1_TIMING_MAX_SELECT_LSB | + 5 << QMI_M1_TIMING_MIN_DESELECT_LSB | + 3 << QMI_M1_TIMING_RXDELAY_LSB | + 2 << QMI_M1_TIMING_CLKDIV_LSB; + } else { + // Set PSRAM timing for APS6404: + // - Max select assumes a sys clock speed >= 120MHz + // - Min deselect assumes a sys clock speed <= 138MHz + // - Clkdiv of 1 is OK up to 133MHz. + qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | + QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | + 15 << QMI_M1_TIMING_MAX_SELECT_LSB | + 2 << QMI_M1_TIMING_MIN_DESELECT_LSB | + 2 << QMI_M1_TIMING_RXDELAY_LSB | + 1 << QMI_M1_TIMING_CLKDIV_LSB; + } // Set PSRAM commands and formats qmi_hw->m[1].rfmt = @@ -175,6 +178,5 @@ size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { // Enable writes to PSRAM hw_set_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_WRITABLE_M1_BITS); - // TODO: Detect PSRAM ID and size return psram_size; } From 3ea9dd0d107adcab252d0c9d2ac7d1529405ff92 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Fri, 9 Aug 2024 14:15:33 +0100 Subject: [PATCH 04/17] ports/rp2: Make split-heap optional. My tests found issues when PSRAM is combined with the existing RAM in a split-heap configuration. Since this option is not enabled by default on RP2 I have changed it to be optional. PSRAM will be used exclusively if MICROPY_GC_SPLIT_HEAP == 0, it will be added to RAM if MICROPY_GC_SPLIT_HEAP == 1, and the system will fall back to RAM only if it's not detected. Signed-off-by: Phil Howard --- ports/rp2/main.c | 10 +++++++++- ports/rp2/mpconfigport.h | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/ports/rp2/main.c b/ports/rp2/main.c index 2486d72ebb4bb..aba784c7118b7 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -122,12 +122,20 @@ int main(int argc, char **argv) { // Initialise stack extents and GC heap. mp_cstack_init_with_top(&__StackTop, &__StackTop - &__StackBottom); - gc_init(&__GcHeapStart, &__GcHeapEnd); #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); if (psram_size) { + #if MICROPY_GC_SPLIT_HEAP + gc_init(&__GcHeapStart, &__GcHeapEnd); gc_add((void *)PSRAM_LOCATION, (void *)(PSRAM_LOCATION + psram_size)); + #else + gc_init((void *)PSRAM_LOCATION, (void *)(PSRAM_LOCATION + psram_size)); + #endif + } else { + gc_init(&__GcHeapStart, &__GcHeapEnd); } + #else + gc_init(&__GcHeapStart, &__GcHeapEnd); #endif #if MICROPY_PY_LWIP diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 8d807053c3577..1a8330c563ea2 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -73,8 +73,8 @@ // Memory allocation policies #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t -#ifdef MICROPY_HW_ENABLE_PSRAM -#define MICROPY_GC_SPLIT_HEAP (1) +#ifndef MICROPY_GC_SPLIT_HEAP +#define MICROPY_GC_SPLIT_HEAP (0) // whether PSRAM is added to or replaces the heap #endif #define MICROPY_ALLOC_PATH_MAX (128) #define MICROPY_QSTR_BYTES_IN_HASH (1) From 8f83844de439dd7b96004bb806470cc1384a9308 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Sun, 11 Aug 2024 17:34:04 +0100 Subject: [PATCH 05/17] ports/rp2: Compute QMI timing based on system clock. Signed-off-by: Mike Bell --- ports/rp2/rp2_psram.c | 65 ++++++++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 29 deletions(-) diff --git a/ports/rp2/rp2_psram.c b/ports/rp2/rp2_psram.c index 90c370c86a55e..e3da848d09265 100644 --- a/ports/rp2/rp2_psram.c +++ b/ports/rp2/rp2_psram.c @@ -12,13 +12,15 @@ void __no_inline_not_in_flash_func(psram_set_qmi_timing)() { ; } - if (clock_get_hz(clk_sys) > 133000000) { - // For > 133 MHz - qmi_hw->m[0].timing = 0x40000202; - } else { - // For <= 133 MHz - qmi_hw->m[0].timing = 0x40000101; - } + // Use the minimum divisor assuming a 133MHz flash. + // RX delay equal to the divisor means sampling at the same time as the next falling edge of SCK after the + // falling edge that generated the data. This is pretty tight at 133MHz but seems to work with the Winbond flash chips. + const int max_flash_freq = 133000000; + const int divisor = (clock_get_hz(clk_sys) + max_flash_freq - 1) / max_flash_freq; + const int rxdelay = divisor; + qmi_hw->m[0].timing = (1 << QMI_M0_TIMING_COOLDOWN_LSB) | + rxdelay << QMI_M1_TIMING_RXDELAY_LSB | + divisor << QMI_M1_TIMING_CLKDIV_LSB; // Force a read through XIP to ensure the timing is applied volatile uint32_t *ptr = (volatile uint32_t *)0x14000000; @@ -126,30 +128,35 @@ size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { ; } - if (clock_get_hz(clk_sys) >= 120000000) { - // Set PSRAM timing for APS6404: - // - Max select assumes a sys clock speed >= 120MHz - // - Min deselect assumes a sys clock speed <= 305MHz - // - Clkdiv of 2 is OK up to 266MHz. - qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | - QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | - 15 << QMI_M1_TIMING_MAX_SELECT_LSB | - 5 << QMI_M1_TIMING_MIN_DESELECT_LSB | - 3 << QMI_M1_TIMING_RXDELAY_LSB | - 2 << QMI_M1_TIMING_CLKDIV_LSB; - } else { - // Set PSRAM timing for APS6404: - // - Max select assumes a sys clock speed >= 120MHz - // - Min deselect assumes a sys clock speed <= 138MHz - // - Clkdiv of 1 is OK up to 133MHz. - qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | - QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | - 15 << QMI_M1_TIMING_MAX_SELECT_LSB | - 2 << QMI_M1_TIMING_MIN_DESELECT_LSB | - 2 << QMI_M1_TIMING_RXDELAY_LSB | - 1 << QMI_M1_TIMING_CLKDIV_LSB; + // Set PSRAM timing for APS6404 + // + // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz. + // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late), + // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz). + const int max_psram_freq = 133000000; + const int clock_hz = clock_get_hz(clk_sys); + int divisor = (clock_hz + max_psram_freq - 1) / max_psram_freq; + if (divisor == 1 && clock_hz > 100000000) { + divisor = 2; + } + int rxdelay = divisor; + if (clock_hz / divisor > 100000000) { + rxdelay += 1; } + // - Max select must be <= 8us. The value is given in multiples of 64 system clocks. + // - Min deselect must be >= 18ns. The value is given in system clock cycles - ceil(divisor / 2). + const int clock_period_fs = 1000000000000000ll / clock_hz; + const int max_select = (125 * 1000000) / clock_period_fs; // 125 = 8000ns / 64 + const int min_deselect = (18 * 1000000 + (clock_period_fs - 1)) / clock_period_fs - (divisor + 1) / 2; + + qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | + QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | + max_select << QMI_M1_TIMING_MAX_SELECT_LSB | + min_deselect << QMI_M1_TIMING_MIN_DESELECT_LSB | + rxdelay << QMI_M1_TIMING_RXDELAY_LSB | + divisor << QMI_M1_TIMING_CLKDIV_LSB; + // Set PSRAM commands and formats qmi_hw->m[1].rfmt = QMI_M0_RFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_PREFIX_WIDTH_LSB | \ From 18e7d019a9f40782857fbaa7219886d12fb4976b Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Sun, 11 Aug 2024 17:45:15 +0100 Subject: [PATCH 06/17] ports/rp2: Fix garbage collection with large heap. Signed-off-by: Mike Bell --- ports/rp2/mpconfigport.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 1a8330c563ea2..8d0a917464be5 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -71,8 +71,16 @@ #define MICROPY_CONFIG_ROM_LEVEL (MICROPY_CONFIG_ROM_LEVEL_EXTRA_FEATURES) #endif +#ifndef MICROPY_HW_ENABLE_PSRAM +#define MICROPY_HW_ENABLE_PSRAM (0) +#endif + // Memory allocation policies +#if MICROPY_HW_ENABLE_PSRAM +#define MICROPY_GC_STACK_ENTRY_TYPE uint32_t +#else #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t +#endif #ifndef MICROPY_GC_SPLIT_HEAP #define MICROPY_GC_SPLIT_HEAP (0) // whether PSRAM is added to or replaces the heap #endif From 96270b2064b69dd2d3b1f73cffab7dfc5a4df8b8 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Sun, 11 Aug 2024 20:52:34 +0100 Subject: [PATCH 07/17] ports/rp2: Remove flash timing control from PSRAM. Signed-off-by: Mike Bell --- ports/rp2/rp2_psram.c | 23 ----------------------- ports/rp2/rp2_psram.h | 1 - 2 files changed, 24 deletions(-) diff --git a/ports/rp2/rp2_psram.c b/ports/rp2/rp2_psram.c index e3da848d09265..30f87adaf4122 100644 --- a/ports/rp2/rp2_psram.c +++ b/ports/rp2/rp2_psram.c @@ -6,27 +6,6 @@ #include "rp2_psram.h" -void __no_inline_not_in_flash_func(psram_set_qmi_timing)() { - // Make sure flash is deselected - QMI doesn't appear to have a busy flag(!) - while ((ioqspi_hw->io[1].status & IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) != IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) { - ; - } - - // Use the minimum divisor assuming a 133MHz flash. - // RX delay equal to the divisor means sampling at the same time as the next falling edge of SCK after the - // falling edge that generated the data. This is pretty tight at 133MHz but seems to work with the Winbond flash chips. - const int max_flash_freq = 133000000; - const int divisor = (clock_get_hz(clk_sys) + max_flash_freq - 1) / max_flash_freq; - const int rxdelay = divisor; - qmi_hw->m[0].timing = (1 << QMI_M0_TIMING_COOLDOWN_LSB) | - rxdelay << QMI_M1_TIMING_RXDELAY_LSB | - divisor << QMI_M1_TIMING_CLKDIV_LSB; - - // Force a read through XIP to ensure the timing is applied - volatile uint32_t *ptr = (volatile uint32_t *)0x14000000; - (void)*ptr; -} - size_t __no_inline_not_in_flash_func(psram_detect)() { int psram_size = 0; @@ -110,8 +89,6 @@ size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { return 0; } - psram_set_qmi_timing(); - // Enable direct mode, PSRAM CS, clkdiv of 10. qmi_hw->direct_csr = 10 << QMI_DIRECT_CSR_CLKDIV_LSB | \ QMI_DIRECT_CSR_EN_BITS | \ diff --git a/ports/rp2/rp2_psram.h b/ports/rp2/rp2_psram.h index cd791602cdd68..718e7a1ea9e76 100644 --- a/ports/rp2/rp2_psram.h +++ b/ports/rp2/rp2_psram.h @@ -5,7 +5,6 @@ #define PSRAM_LOCATION _u(0x11000000) -extern void psram_set_qmi_timing(); extern size_t psram_init(uint cs_pin); #endif From 96da587c1e4317a4fd4d7d266300ec1df4ed8aa6 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Sun, 11 Aug 2024 21:39:05 +0100 Subject: [PATCH 08/17] ports/rp2: Set flash divisor appropriately. Signed-off-by: Mike Bell --- ports/rp2/main.c | 4 +++ ports/rp2/modmachine.c | 12 ++++++++ ports/rp2/rp2_flash.c | 69 ++++++++++++++++++++++++++++++++++++++++++ ports/rp2/rp2_flash.h | 7 +++++ 4 files changed, 92 insertions(+) create mode 100644 ports/rp2/rp2_flash.h diff --git a/ports/rp2/main.c b/ports/rp2/main.c index aba784c7118b7..5ed3519a795d9 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -27,6 +27,7 @@ #include #include "rp2_psram.h" +#include "rp2_flash.h" #include "py/compile.h" #include "py/cstack.h" #include "py/runtime.h" @@ -94,6 +95,9 @@ int main(int argc, char **argv) { // Hook for setting up anything that needs to be super early in the bootup process. MICROPY_BOARD_STARTUP(); + // Set the flash divisor to an appropriate value + rp2_flash_set_timing(); + #if MICROPY_HW_ENABLE_UART_REPL bi_decl(bi_program_feature("UART REPL")) setup_default_uart(); diff --git a/ports/rp2/modmachine.c b/ports/rp2/modmachine.c index 1fb6bc6df9d8e..7e9881b6c5206 100644 --- a/ports/rp2/modmachine.c +++ b/ports/rp2/modmachine.c @@ -32,6 +32,7 @@ #include "modmachine.h" #include "uart.h" #include "rp2_psram.h" +#include "rp2_flash.h" #include "clocks_extra.h" #include "hardware/pll.h" #include "hardware/structs/rosc.h" @@ -95,6 +96,11 @@ static mp_obj_t mp_machine_get_freq(void) { static void mp_machine_set_freq(size_t n_args, const mp_obj_t *args) { mp_int_t freq = mp_obj_get_int(args[0]); + + // If necessary, increase the flash divider before increasing the clock speed + const int old_freq = clock_get_hz(clk_sys); + rp2_flash_set_timing_for_freq(MAX(freq, old_freq)); + if (!set_sys_clock_khz(freq / 1000, false)) { mp_raise_ValueError(MP_ERROR_TEXT("cannot change frequency")); } @@ -112,6 +118,12 @@ static void mp_machine_set_freq(size_t n_args, const mp_obj_t *args) { } } } + + // If clock speed was reduced, maybe we can reduce the flash divider + if (freq < old_freq) { + rp2_flash_set_timing_for_freq(freq); + } + #if MICROPY_HW_ENABLE_UART_REPL setup_default_uart(); mp_uart_init(); diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index 4386986011db2..45f2ec70e1017 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -33,6 +33,12 @@ #include "modrp2.h" #include "hardware/flash.h" #include "pico/binary_info.h" +#ifdef PICO_RP2350 +#include "hardware/structs/ioqspi.h" +#include "hardware/structs/qmi.h" +#else +#include "hardware/structs/ssi.h" +#endif #define BLOCK_SIZE_BYTES (FLASH_SECTOR_SIZE) @@ -71,6 +77,48 @@ bi_decl(bi_block_device( BINARY_INFO_BLOCK_DEV_FLAG_WRITE | BINARY_INFO_BLOCK_DEV_FLAG_PT_UNKNOWN)); +// Function to set the flash divisor to the correct divisor, assumes interrupts disabled +// and core1 locked out if relevant. +static void __no_inline_not_in_flash_func(rp2_flash_set_timing_internal)(int clock_hz) { + + // Use the minimum divisor assuming a 133MHz flash. + const int max_flash_freq = 133000000; + int divisor = (clock_hz + max_flash_freq - 1) / max_flash_freq; + + #if PICO_RP2350 + // Make sure flash is deselected - QMI doesn't appear to have a busy flag(!) + while ((ioqspi_hw->io[1].status & IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) != IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) { + ; + } + + // RX delay equal to the divisor means sampling at the same time as the next falling edge of SCK after the + // falling edge that generated the data. This is pretty tight at 133MHz but seems to work with the Winbond flash chips. + const int rxdelay = divisor; + qmi_hw->m[0].timing = (1 << QMI_M0_TIMING_COOLDOWN_LSB) | + rxdelay << QMI_M1_TIMING_RXDELAY_LSB | + divisor << QMI_M1_TIMING_CLKDIV_LSB; + + // Force a read through XIP to ensure the timing is applied + volatile uint32_t *ptr = (volatile uint32_t *)0x14000000; + (void)*ptr; + #else + // RP2040 SSI hardware only supports even divisors + if (divisor & 1) { + divisor += 1; + } + + // Wait for SSI not busy + while (ssi_hw->sr & SSI_SR_BUSY_BITS) { + ; + } + + // Disable, set the new divisor, and re-enable + hw_clear_bits(&ssi_hw->ssienr, SSI_SSIENR_SSI_EN_BITS); + ssi_hw->baudr = divisor; + hw_set_bits(&ssi_hw->ssienr, SSI_SSIENR_SSI_EN_BITS); + #endif +} + // Flash erase and write must run with interrupts disabled and the other core suspended, // because the XIP bit gets disabled. static uint32_t begin_critical_flash_section(void) { @@ -94,6 +142,7 @@ static void end_critical_flash_section(uint32_t state) { #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM psram_init(MICROPY_HW_PSRAM_CS_PIN); #endif + rp2_flash_set_timing_internal(clock_get_hz(clk_sys)); restore_interrupts(state); if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { multicore_lockout_end_blocking(); @@ -250,3 +299,23 @@ MP_DEFINE_CONST_OBJ_TYPE( make_new, rp2_flash_make_new, locals_dict, &rp2_flash_locals_dict ); + +// Modify the flash timing. Ensure flash access is suspended while +// the timings are altered. +void rp2_flash_set_timing_for_freq(int clock_hz) { + if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { + multicore_lockout_start_blocking(); + } + uint32_t state = save_and_disable_interrupts(); + + rp2_flash_set_timing_internal(clock_hz); + + restore_interrupts(state); + if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { + multicore_lockout_end_blocking(); + } +} + +void rp2_flash_set_timing() { + rp2_flash_set_timing_for_freq(clock_get_hz(clk_sys)); +} diff --git a/ports/rp2/rp2_flash.h b/ports/rp2/rp2_flash.h new file mode 100644 index 0000000000000..d5cf3ba2acac0 --- /dev/null +++ b/ports/rp2/rp2_flash.h @@ -0,0 +1,7 @@ +#ifndef MICROPY_INCLUDED_RP2_MACHINE_FLASH_H +#define MICROPY_INCLUDED_RP2_MACHINE_FLASH_H + +extern void rp2_flash_set_timing_for_freq(int clock_hz); +extern void rp2_flash_set_timing(); + +#endif From e328d4cbdce0e2102b655cd14f9bfddcc16f4e9b Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Thu, 15 Aug 2024 14:01:57 +0100 Subject: [PATCH 09/17] ports/rp2: Reset flash timing before PSRAM timing. Signed-off-by: Mike Bell --- ports/rp2/rp2_flash.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index 45f2ec70e1017..eae219f49ea9e 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -139,10 +139,10 @@ static uint32_t begin_critical_flash_section(void) { } static void end_critical_flash_section(uint32_t state) { + rp2_flash_set_timing_internal(clock_get_hz(clk_sys)); #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM psram_init(MICROPY_HW_PSRAM_CS_PIN); #endif - rp2_flash_set_timing_internal(clock_get_hz(clk_sys)); restore_interrupts(state); if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { multicore_lockout_end_blocking(); From 1585553c49b7a56e126570b2504e086414e8aa95 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Mon, 30 Sep 2024 16:57:19 +0100 Subject: [PATCH 10/17] ports/rp2: Move PSRAM init earlier in startup. Try to avoid causing an upset with USB startup by detecting PSRAM as early as possible. Signed-off-by: Phil Howard --- ports/rp2/main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ports/rp2/main.c b/ports/rp2/main.c index 5ed3519a795d9..7c4dda42b73dc 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -86,6 +86,10 @@ int main(int argc, char **argv) { SCB->SCR |= SCB_SCR_SEVONPEND_Msk; #endif + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif + pendsv_init(); soft_timer_init(); @@ -127,7 +131,6 @@ int main(int argc, char **argv) { mp_cstack_init_with_top(&__StackTop, &__StackTop - &__StackBottom); #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM - size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); if (psram_size) { #if MICROPY_GC_SPLIT_HEAP gc_init(&__GcHeapStart, &__GcHeapEnd); From 7d3b7d607691af5cdb4d67993769403dc2138f66 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Wed, 23 Oct 2024 21:43:55 +0100 Subject: [PATCH 11/17] ports/rp2: Invalidate cache after clean to prevent hangs. Signed-off-by: Mike Bell --- ports/rp2/rp2_flash.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index eae219f49ea9e..e48c5c822c13b 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -132,6 +132,10 @@ static uint32_t begin_critical_flash_section(void) { uint8_t *maintenance_ptr = (uint8_t *)XIP_MAINTENANCE_BASE; for (int i = 1; i < 16 * 1024; i += 8) { maintenance_ptr[i] = 0; + + // Must also invalidate the cache lines to prevent rare hangs + // See: https://forums.raspberrypi.com/viewtopic.php?t=378249) + maintenance_ptr[i - 1] = 0; } #endif From b8227a3f7f40160c683deca8f13f690325680976 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Mon, 28 Oct 2024 18:38:45 +0000 Subject: [PATCH 12/17] ports/rp2: Better fix to cache cleaning. Signed-off-by: Mike Bell --- ports/rp2/rp2_flash.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index e48c5c822c13b..f464c048b76e7 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -129,13 +129,12 @@ static uint32_t begin_critical_flash_section(void) { #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM // We're about to invalidate the XIP cache, clean it first to commit any dirty writes to PSRAM - uint8_t *maintenance_ptr = (uint8_t *)XIP_MAINTENANCE_BASE; + // Use the upper 16k of the maintenance space (0x1bffc000 through 0x1bffffff) to workaround + // incorrect behaviour of the XIP clean operation, where it also alters the tag of the associated + // cache line: https://forums.raspberrypi.com/viewtopic.php?t=378249#p2263677 + volatile uint8_t *maintenance_ptr = (volatile uint8_t *)(XIP_SRAM_BASE + (XIP_MAINTENANCE_BASE - XIP_BASE)); for (int i = 1; i < 16 * 1024; i += 8) { maintenance_ptr[i] = 0; - - // Must also invalidate the cache lines to prevent rare hangs - // See: https://forums.raspberrypi.com/viewtopic.php?t=378249) - maintenance_ptr[i - 1] = 0; } #endif From 2024d5b32559dc8a56425a0176dc887e1d4d3445 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Sun, 13 Oct 2024 15:57:11 +0100 Subject: [PATCH 13/17] ports/rp2: Further PSRAM setup tweaks. Move PSRAM setup to immediately after flash timing setup. Disable interrupts while setting up PSRAM. Signed-off-by: Mike Bell --- ports/rp2/main.c | 8 ++++---- ports/rp2/rp2_psram.c | 7 ++++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ports/rp2/main.c b/ports/rp2/main.c index 7c4dda42b73dc..48a3cc0f1b11a 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -86,10 +86,6 @@ int main(int argc, char **argv) { SCB->SCR |= SCB_SCR_SEVONPEND_Msk; #endif - #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM - size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); - #endif - pendsv_init(); soft_timer_init(); @@ -102,6 +98,10 @@ int main(int argc, char **argv) { // Set the flash divisor to an appropriate value rp2_flash_set_timing(); + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif + #if MICROPY_HW_ENABLE_UART_REPL bi_decl(bi_program_feature("UART REPL")) setup_default_uart(); diff --git a/ports/rp2/rp2_psram.c b/ports/rp2/rp2_psram.c index 30f87adaf4122..ae6a3fb0a562f 100644 --- a/ports/rp2/rp2_psram.c +++ b/ports/rp2/rp2_psram.c @@ -9,8 +9,6 @@ size_t __no_inline_not_in_flash_func(psram_detect)() { int psram_size = 0; - uint32_t intr_stash = save_and_disable_interrupts(); - // Try and read the PSRAM ID via direct_csr. qmi_hw->direct_csr = 30 << QMI_DIRECT_CSR_CLKDIV_LSB | QMI_DIRECT_CSR_EN_BITS; @@ -76,13 +74,14 @@ size_t __no_inline_not_in_flash_func(psram_detect)() { } } - restore_interrupts(intr_stash); return psram_size; } size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { gpio_set_function(cs_pin, GPIO_FUNC_XIP_CS1); + uint32_t intr_stash = save_and_disable_interrupts(); + size_t psram_size = psram_detect(); if (!psram_size) { @@ -162,5 +161,7 @@ size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { // Enable writes to PSRAM hw_set_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_WRITABLE_M1_BITS); + restore_interrupts(intr_stash); + return psram_size; } From be7e81c3fbc90766fee148caefc2e8d6ab0ab5c1 Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Thu, 5 Dec 2024 10:04:13 +0000 Subject: [PATCH 14/17] ports/rp2: Enable split heap by default. Enable split heap if PSRAM is enabled. This allows both the internal SRAM and PSRAM to be used as heap. Signed-off-by: Phil Howard --- ports/rp2/mpconfigport.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 8d0a917464be5..d12cdb54041a4 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -82,7 +82,7 @@ #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t #endif #ifndef MICROPY_GC_SPLIT_HEAP -#define MICROPY_GC_SPLIT_HEAP (0) // whether PSRAM is added to or replaces the heap +#define MICROPY_GC_SPLIT_HEAP MICROPY_HW_ENABLE_PSRAM // whether PSRAM is added to or replaces the heap #endif #define MICROPY_ALLOC_PATH_MAX (128) #define MICROPY_QSTR_BYTES_IN_HASH (1) From 3a7019c9d737d88580e5c68aca29e7b1500d705b Mon Sep 17 00:00:00 2001 From: Phil Howard Date: Thu, 5 Dec 2024 10:09:54 +0000 Subject: [PATCH 15/17] ports/rp2: Raise GC stack size for PSRAM. GC stack was overflowing and causing the GC to scan through the entire memory pool, which is a particularly pathological case with 8MB PSRAM. This caused noticable slowdowns during GC. This change takes the stack from 256 to 4096 bytes to avoid overflow. Co-authored-by: Kirk Benell Signed-off-by: Phil Howard --- ports/rp2/mpconfigport.h | 1 + 1 file changed, 1 insertion(+) diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index d12cdb54041a4..1a4fa6fb5c7f0 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -78,6 +78,7 @@ // Memory allocation policies #if MICROPY_HW_ENABLE_PSRAM #define MICROPY_GC_STACK_ENTRY_TYPE uint32_t +#define MICROPY_ALLOC_GC_STACK_SIZE (1024) // Avoid slowdown when GC stack overflow causes a full sweep of PSRAM-backed heap #else #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t #endif From 912ae3fb0af81e9dd0f8a4ffcf18f6d203f39061 Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Sun, 17 Nov 2024 14:59:02 +0000 Subject: [PATCH 16/17] ports/rp2: PSRAM linker script. Signed-off-by: Mike Bell --- ports/rp2/CMakeLists.txt | 11 +- ports/rp2/main.c | 10 +- ports/rp2/memmap_mp_rp2350_psram.ld | 322 ++++++++++++++++++++++++++++ ports/rp2/rp2_psram.h | 2 - 4 files changed, 339 insertions(+), 6 deletions(-) create mode 100644 ports/rp2/memmap_mp_rp2350_psram.ld diff --git a/ports/rp2/CMakeLists.txt b/ports/rp2/CMakeLists.txt index b0569f84e610c..b2ab98ddb2430 100644 --- a/ports/rp2/CMakeLists.txt +++ b/ports/rp2/CMakeLists.txt @@ -173,10 +173,13 @@ set(MICROPY_SOURCE_PORT ${CMAKE_BINARY_DIR}/pins_${MICROPY_BOARD}.c ) -if(PICO_RP2350) +if(MICROPY_HW_ENABLE_PSRAM) list(APPEND MICROPY_SOURCE_PORT rp2_psram.c ) + target_compile_definitions(${MICROPY_TARGET} PRIVATE + MICROPY_HW_ENABLE_PSRAM=1 + ) endif() set(MICROPY_SOURCE_QSTR @@ -597,7 +600,11 @@ if (PICO_ON_DEVICE AND NOT PICO_NO_FLASH AND NOT PICO_COPY_TO_RAM) if(PICO_RP2040) pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2040.ld) elseif(PICO_RP2350) - pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2350.ld) + if (MICROPY_HW_ENABLE_PSRAM) + pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2350_psram.ld) + else() + pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2350.ld) + endif() endif() endif() diff --git a/ports/rp2/main.c b/ports/rp2/main.c index 48a3cc0f1b11a..b9a40667dd236 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -70,6 +70,7 @@ extern uint8_t __StackTop, __StackBottom; extern uint8_t __GcHeapStart, __GcHeapEnd; +extern uint8_t __PsramGcHeapStart, __PsramGcHeapEnd; // Embed version info in the binary in machine readable form bi_decl(bi_program_version_string(MICROPY_GIT_TAG)); @@ -132,11 +133,16 @@ int main(int argc, char **argv) { #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM if (psram_size) { + // Linker script assumes a 2MB PSRAM, increase the size accordingly. + size_t psram_additional_size = 0; + if (psram_size > 2 * 1024 * 1024) { + psram_additional_size = psram_size - 2 * 1024 * 1024; + } #if MICROPY_GC_SPLIT_HEAP gc_init(&__GcHeapStart, &__GcHeapEnd); - gc_add((void *)PSRAM_LOCATION, (void *)(PSRAM_LOCATION + psram_size)); + gc_add(&__PsramGcHeapStart, &__PsramGcHeapEnd + psram_additional_size); #else - gc_init((void *)PSRAM_LOCATION, (void *)(PSRAM_LOCATION + psram_size)); + gc_init(&__PsramGcHeapStart, &__PsramGcHeapEnd + psram_additional_size); #endif } else { gc_init(&__GcHeapStart, &__GcHeapEnd); diff --git a/ports/rp2/memmap_mp_rp2350_psram.ld b/ports/rp2/memmap_mp_rp2350_psram.ld new file mode 100644 index 0000000000000..c63572bbcb5d3 --- /dev/null +++ b/ports/rp2/memmap_mp_rp2350_psram.ld @@ -0,0 +1,322 @@ +/* Based on GCC ARM embedded samples. + Defines the following symbols for use by code: + __exidx_start + __exidx_end + __etext + __data_start__ + __preinit_array_start + __preinit_array_end + __init_array_start + __init_array_end + __fini_array_start + __fini_array_end + __data_end__ + __bss_start__ + __bss_end__ + __end__ + end + __HeapLimit + __StackLimit + __StackTop + __stack (== StackTop) +*/ + +MEMORY +{ + FLASH(rx) : ORIGIN = 0x10000000, LENGTH = 4096k + RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 512k + SCRATCH_X(rwx) : ORIGIN = 0x20080000, LENGTH = 4k + SCRATCH_Y(rwx) : ORIGIN = 0x20081000, LENGTH = 4k + PSRAM(rw) : ORIGIN = 0x11000000, LENGTH = 2048k +} + +ENTRY(_entry_point) + +SECTIONS +{ + .flash_begin : { + __flash_binary_start = .; + } > FLASH + + /* The bootrom will enter the image at the point indicated in your + IMAGE_DEF, which is usually the reset handler of your vector table. + + The debugger will use the ELF entry point, which is the _entry_point + symbol, and in our case is *different from the bootrom's entry point.* + This is used to go back through the bootrom on debugger launches only, + to perform the same initial flash setup that would be performed on a + cold boot. + */ + + .text : { + __logical_binary_start = .; + KEEP (*(.vectors)) + KEEP (*(.binary_info_header)) + __binary_info_header_end = .; + KEEP (*(.embedded_block)) + __embedded_block_end = .; + KEEP (*(.reset)) + /* TODO revisit this now memset/memcpy/float in ROM */ + /* bit of a hack right now to exclude all floating point and time critical (e.g. memset, memcpy) code from + * FLASH ... we will include any thing excluded here in .data below by default */ + *(.init) + *libgcc.a:cmse_nonsecure_call.o + /* Change for MicroPython... exclude gc.c, parse.c, vm.c from flash */ + *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a: *gc.c.obj *vm.c.obj *parse.c.obj) .text*) + *(.fini) + /* Pull all c'tors into .text */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + /* Followed by destructors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(SORT(.preinit_array.*))) + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + *(SORT(.fini_array.*)) + *(.fini_array) + PROVIDE_HIDDEN (__fini_array_end = .); + *(.eh_frame*) + . = ALIGN(4); + } > FLASH + + /* Note the boot2 section is optional, and should be discarded if there is + no reference to it *inside* the binary, as it is not called by the + bootrom. (The bootrom performs a simple best-effort XIP setup and + leaves it to the binary to do anything more sophisticated.) However + there is still a size limit of 256 bytes, to ensure the boot2 can be + stored in boot RAM. + + Really this is a "XIP setup function" -- the name boot2 is historic and + refers to its dual-purpose on RP2040, where it also handled vectoring + from the bootrom into the user image. + */ + + .boot2 : { + __boot2_start__ = .; + *(.boot2) + __boot2_end__ = .; + } > FLASH + + ASSERT(__boot2_end__ - __boot2_start__ <= 256, + "ERROR: Pico second stage bootloader must be no more than 256 bytes in size") + + .rodata : { + *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .rodata*) + *(.srodata*) + . = ALIGN(4); + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*))) + . = ALIGN(4); + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* Machine inspectable binary information */ + . = ALIGN(4); + __binary_info_start = .; + .binary_info : + { + KEEP(*(.binary_info.keep.*)) + *(.binary_info.*) + } > FLASH + __binary_info_end = .; + . = ALIGN(4); + + .ram_vector_table (NOLOAD): { + *(.ram_vector_table) + } > RAM + + .uninitialized_data (NOLOAD): { + . = ALIGN(4); + *(.uninitialized_data*) + } > RAM + + .data : { + __data_start__ = .; + *(vtable) + + *(.time_critical*) + + /* remaining .text and .rodata; i.e. stuff we exclude above because we want it in RAM */ + *(.text*) + . = ALIGN(4); + *(.rodata*) + . = ALIGN(4); + + *(.data*) + *(.sdata*) + + . = ALIGN(4); + *(.after_data.*) + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__mutex_array_start = .); + KEEP(*(SORT(.mutex_array.*))) + KEEP(*(.mutex_array)) + PROVIDE_HIDDEN (__mutex_array_end = .); + + *(.jcr) + . = ALIGN(4); + } > RAM AT> FLASH + + .tdata : { + . = ALIGN(4); + *(.tdata .tdata.* .gnu.linkonce.td.*) + /* All data end */ + __tdata_end = .; + } > RAM AT> FLASH + PROVIDE(__data_end__ = .); + + /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */ + __etext = LOADADDR(.data); + + .tbss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + __tls_base = .; + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + + __tls_end = .; + } > RAM + + .bss (NOLOAD) : { + . = ALIGN(4); + __tbss_end = .; + + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*))) + *(COMMON) + PROVIDE(__global_pointer$ = . + 2K); + *(.sbss*) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + .heap (NOLOAD): + { + __end__ = .; + end = __end__; + KEEP(*(.heap*)) + /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however + to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */ + /* Change for MicroPython: don't include this, it increases reported firmware size. + /* . = ORIGIN(RAM) + LENGTH(RAM); */ + __HeapLimit = .; + } > RAM + + /* Start and end symbols must be word-aligned */ + .scratch_x : { + __scratch_x_start__ = .; + *(.scratch_x.*) + . = ALIGN(4); + __scratch_x_end__ = .; + } > SCRATCH_X AT > FLASH + __scratch_x_source__ = LOADADDR(.scratch_x); + + .scratch_y : { + __scratch_y_start__ = .; + *(.scratch_y.*) + . = ALIGN(4); + __scratch_y_end__ = .; + } > SCRATCH_Y AT > FLASH + __scratch_y_source__ = LOADADDR(.scratch_y); + + /* .stack*_dummy section doesn't contains any symbols. It is only + * used for linker to calculate size of stack sections, and assign + * values to stack symbols later + * + * stack1 section may be empty/missing if platform_launch_core1 is not used */ + + /* by default we put core 0 stack at the end of scratch Y, so that if core 1 + * stack is not used then all of SCRATCH_X is free. + */ + .stack1_dummy (NOLOAD): + { + *(.stack1*) + } > SCRATCH_X + .stack_dummy (NOLOAD): + { + KEEP(*(.stack*)) + } > SCRATCH_Y + + .flash_end : { + KEEP(*(.embedded_end_block*)) + PROVIDE(__flash_binary_end = .); + } > FLASH =0xaa + + /* PSRAM data section */ + .psram_data (NOLOAD): { + . = ALIGN(4); + *(.psram_data*) + PROVIDE(__psram_data_end = .); + } > PSRAM + + /* stack limit is poorly named, but historically is maximum heap ptr */ + __StackLimit = __bss_end__ + __micropy_c_heap_size__; + + /* Define start and end of internal RAM GC heap */ + __GcHeapStart = __StackLimit; /* after the C heap (sbrk limit) */ + __GcHeapEnd = ORIGIN(RAM) + LENGTH(RAM) - __micropy_extra_stack__; + + /* Define start and end of PSRAM GC heap */ + __PsramGcHeapStart = __psram_data_end; /* after the C heap (sbrk limit) */ + __PsramGcHeapEnd = ORIGIN(PSRAM) + LENGTH(PSRAM); + + /* Define start and end of C stack */ + __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y); + __StackBottom = __GcHeapEnd; + PROVIDE(__stack = __StackTop); + + /* picolibc and LLVM */ + PROVIDE (__heap_start = __end__); + PROVIDE (__heap_end = __HeapLimit); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1)); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + + /* llvm-libc */ + PROVIDE (_end = __end__); + PROVIDE (__llvm_libc_heap_limit = __HeapLimit); + + /* Ensure internal RAM didn't overflow */ + ASSERT((__GcHeapEnd - __GcHeapStart) > 0, "Main RAM overflow") + + /* Check GC heap is at least 128kB */ + ASSERT((__PsramGcHeapEnd - __PsramGcHeapStart) > 128*1024, "GcHeap is too small") + + ASSERT( __binary_info_header_end - __logical_binary_start <= 1024, "Binary info must be in first 1024 bytes of the binary") + ASSERT( __embedded_block_end - __logical_binary_start <= 4096, "Embedded block must be in first 4096 bytes of the binary") + + /* todo assert on extra code */ +} diff --git a/ports/rp2/rp2_psram.h b/ports/rp2/rp2_psram.h index 718e7a1ea9e76..29ef54f8a3617 100644 --- a/ports/rp2/rp2_psram.h +++ b/ports/rp2/rp2_psram.h @@ -3,8 +3,6 @@ #ifndef MICROPY_INCLUDED_RP2_MACHINE_PSRAM_H #define MICROPY_INCLUDED_RP2_MACHINE_PSRAM_H -#define PSRAM_LOCATION _u(0x11000000) - extern size_t psram_init(uint cs_pin); #endif From b69224dcc72aba7184904d7b03f5dbe5b099b3fc Mon Sep 17 00:00:00 2001 From: Mike Bell Date: Fri, 6 Dec 2024 15:46:35 +0000 Subject: [PATCH 17/17] ports/rp2: Use cache clean from pico-sdk. Signed-off-by: Mike Bell --- ports/rp2/CMakeLists.txt | 21 ++++++++++++--------- ports/rp2/rp2_flash.c | 11 ++++------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/ports/rp2/CMakeLists.txt b/ports/rp2/CMakeLists.txt index b2ab98ddb2430..3f17f2265d837 100644 --- a/ports/rp2/CMakeLists.txt +++ b/ports/rp2/CMakeLists.txt @@ -173,15 +173,6 @@ set(MICROPY_SOURCE_PORT ${CMAKE_BINARY_DIR}/pins_${MICROPY_BOARD}.c ) -if(MICROPY_HW_ENABLE_PSRAM) - list(APPEND MICROPY_SOURCE_PORT - rp2_psram.c - ) - target_compile_definitions(${MICROPY_TARGET} PRIVATE - MICROPY_HW_ENABLE_PSRAM=1 - ) -endif() - set(MICROPY_SOURCE_QSTR ${MICROPY_SOURCE_PY} ${MICROPY_DIR}/shared/readline/readline.c @@ -259,6 +250,18 @@ elseif(PICO_RISCV) ) endif() +if(MICROPY_HW_ENABLE_PSRAM) + list(APPEND MICROPY_SOURCE_PORT + rp2_psram.c + ) + list(APPEND PICO_SDK_COMPONENTS + hardware_xip_cache + ) + target_compile_definitions(${MICROPY_TARGET} PRIVATE + MICROPY_HW_ENABLE_PSRAM=1 + ) +endif() + # Use our custom pico_float_micropython float implementation. This is needed for two reasons: # - to fix inf handling in pico-sdk's __wrap___aeabi_fadd(); # - so we can use our own libm functions, to fix inaccuracies in the pico-sdk versions. diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index f464c048b76e7..0cbe6d0fe8cc6 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -39,6 +39,9 @@ #else #include "hardware/structs/ssi.h" #endif +#if MICROPY_HW_ENABLE_PSRAM +#include "hardware/xip_cache.h" +#endif #define BLOCK_SIZE_BYTES (FLASH_SECTOR_SIZE) @@ -129,13 +132,7 @@ static uint32_t begin_critical_flash_section(void) { #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM // We're about to invalidate the XIP cache, clean it first to commit any dirty writes to PSRAM - // Use the upper 16k of the maintenance space (0x1bffc000 through 0x1bffffff) to workaround - // incorrect behaviour of the XIP clean operation, where it also alters the tag of the associated - // cache line: https://forums.raspberrypi.com/viewtopic.php?t=378249#p2263677 - volatile uint8_t *maintenance_ptr = (volatile uint8_t *)(XIP_SRAM_BASE + (XIP_MAINTENANCE_BASE - XIP_BASE)); - for (int i = 1; i < 16 * 1024; i += 8) { - maintenance_ptr[i] = 0; - } + xip_cache_clean_all(); #endif return state;