diff --git a/ports/rp2/CMakeLists.txt b/ports/rp2/CMakeLists.txt index 4baaf7debf6c5..3f17f2265d837 100644 --- a/ports/rp2/CMakeLists.txt +++ b/ports/rp2/CMakeLists.txt @@ -250,6 +250,18 @@ elseif(PICO_RISCV) ) endif() +if(MICROPY_HW_ENABLE_PSRAM) + list(APPEND MICROPY_SOURCE_PORT + rp2_psram.c + ) + list(APPEND PICO_SDK_COMPONENTS + hardware_xip_cache + ) + target_compile_definitions(${MICROPY_TARGET} PRIVATE + MICROPY_HW_ENABLE_PSRAM=1 + ) +endif() + # Use our custom pico_float_micropython float implementation. This is needed for two reasons: # - to fix inf handling in pico-sdk's __wrap___aeabi_fadd(); # - so we can use our own libm functions, to fix inaccuracies in the pico-sdk versions. @@ -591,7 +603,11 @@ if (PICO_ON_DEVICE AND NOT PICO_NO_FLASH AND NOT PICO_COPY_TO_RAM) if(PICO_RP2040) pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2040.ld) elseif(PICO_RP2350) - pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2350.ld) + if (MICROPY_HW_ENABLE_PSRAM) + pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2350_psram.ld) + else() + pico_set_linker_script(${MICROPY_TARGET} ${CMAKE_CURRENT_LIST_DIR}/memmap_mp_rp2350.ld) + endif() endif() endif() diff --git a/ports/rp2/main.c b/ports/rp2/main.c index d6bf448267152..b9a40667dd236 100644 --- a/ports/rp2/main.c +++ b/ports/rp2/main.c @@ -26,6 +26,8 @@ #include +#include "rp2_psram.h" +#include "rp2_flash.h" #include "py/compile.h" #include "py/cstack.h" #include "py/runtime.h" @@ -68,6 +70,7 @@ extern uint8_t __StackTop, __StackBottom; extern uint8_t __GcHeapStart, __GcHeapEnd; +extern uint8_t __PsramGcHeapStart, __PsramGcHeapEnd; // Embed version info in the binary in machine readable form bi_decl(bi_program_version_string(MICROPY_GIT_TAG)); @@ -93,6 +96,13 @@ int main(int argc, char **argv) { // Hook for setting up anything that needs to be super early in the bootup process. MICROPY_BOARD_STARTUP(); + // Set the flash divisor to an appropriate value + rp2_flash_set_timing(); + + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + size_t psram_size = psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif + #if MICROPY_HW_ENABLE_UART_REPL bi_decl(bi_program_feature("UART REPL")) setup_default_uart(); @@ -120,7 +130,26 @@ int main(int argc, char **argv) { // Initialise stack extents and GC heap. mp_cstack_init_with_top(&__StackTop, &__StackTop - &__StackBottom); + + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + if (psram_size) { + // Linker script assumes a 2MB PSRAM, increase the size accordingly. + size_t psram_additional_size = 0; + if (psram_size > 2 * 1024 * 1024) { + psram_additional_size = psram_size - 2 * 1024 * 1024; + } + #if MICROPY_GC_SPLIT_HEAP + gc_init(&__GcHeapStart, &__GcHeapEnd); + gc_add(&__PsramGcHeapStart, &__PsramGcHeapEnd + psram_additional_size); + #else + gc_init(&__PsramGcHeapStart, &__PsramGcHeapEnd + psram_additional_size); + #endif + } else { + gc_init(&__GcHeapStart, &__GcHeapEnd); + } + #else gc_init(&__GcHeapStart, &__GcHeapEnd); + #endif #if MICROPY_PY_LWIP // lwIP doesn't allow to reinitialise itself by subsequent calls to this function diff --git a/ports/rp2/memmap_mp_rp2350_psram.ld b/ports/rp2/memmap_mp_rp2350_psram.ld new file mode 100644 index 0000000000000..c63572bbcb5d3 --- /dev/null +++ b/ports/rp2/memmap_mp_rp2350_psram.ld @@ -0,0 +1,322 @@ +/* Based on GCC ARM embedded samples. + Defines the following symbols for use by code: + __exidx_start + __exidx_end + __etext + __data_start__ + __preinit_array_start + __preinit_array_end + __init_array_start + __init_array_end + __fini_array_start + __fini_array_end + __data_end__ + __bss_start__ + __bss_end__ + __end__ + end + __HeapLimit + __StackLimit + __StackTop + __stack (== StackTop) +*/ + +MEMORY +{ + FLASH(rx) : ORIGIN = 0x10000000, LENGTH = 4096k + RAM(rwx) : ORIGIN = 0x20000000, LENGTH = 512k + SCRATCH_X(rwx) : ORIGIN = 0x20080000, LENGTH = 4k + SCRATCH_Y(rwx) : ORIGIN = 0x20081000, LENGTH = 4k + PSRAM(rw) : ORIGIN = 0x11000000, LENGTH = 2048k +} + +ENTRY(_entry_point) + +SECTIONS +{ + .flash_begin : { + __flash_binary_start = .; + } > FLASH + + /* The bootrom will enter the image at the point indicated in your + IMAGE_DEF, which is usually the reset handler of your vector table. + + The debugger will use the ELF entry point, which is the _entry_point + symbol, and in our case is *different from the bootrom's entry point.* + This is used to go back through the bootrom on debugger launches only, + to perform the same initial flash setup that would be performed on a + cold boot. + */ + + .text : { + __logical_binary_start = .; + KEEP (*(.vectors)) + KEEP (*(.binary_info_header)) + __binary_info_header_end = .; + KEEP (*(.embedded_block)) + __embedded_block_end = .; + KEEP (*(.reset)) + /* TODO revisit this now memset/memcpy/float in ROM */ + /* bit of a hack right now to exclude all floating point and time critical (e.g. memset, memcpy) code from + * FLASH ... we will include any thing excluded here in .data below by default */ + *(.init) + *libgcc.a:cmse_nonsecure_call.o + /* Change for MicroPython... exclude gc.c, parse.c, vm.c from flash */ + *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a: *gc.c.obj *vm.c.obj *parse.c.obj) .text*) + *(.fini) + /* Pull all c'tors into .text */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + /* Followed by destructors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(SORT(.preinit_array.*))) + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + *(SORT(.fini_array.*)) + *(.fini_array) + PROVIDE_HIDDEN (__fini_array_end = .); + *(.eh_frame*) + . = ALIGN(4); + } > FLASH + + /* Note the boot2 section is optional, and should be discarded if there is + no reference to it *inside* the binary, as it is not called by the + bootrom. (The bootrom performs a simple best-effort XIP setup and + leaves it to the binary to do anything more sophisticated.) However + there is still a size limit of 256 bytes, to ensure the boot2 can be + stored in boot RAM. + + Really this is a "XIP setup function" -- the name boot2 is historic and + refers to its dual-purpose on RP2040, where it also handled vectoring + from the bootrom into the user image. + */ + + .boot2 : { + __boot2_start__ = .; + *(.boot2) + __boot2_end__ = .; + } > FLASH + + ASSERT(__boot2_end__ - __boot2_start__ <= 256, + "ERROR: Pico second stage bootloader must be no more than 256 bytes in size") + + .rodata : { + *(EXCLUDE_FILE(*libgcc.a: *libc.a:*lib_a-mem*.o *libm.a:) .rodata*) + *(.srodata*) + . = ALIGN(4); + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.flashdata*))) + . = ALIGN(4); + } > FLASH + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > FLASH + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > FLASH + __exidx_end = .; + + /* Machine inspectable binary information */ + . = ALIGN(4); + __binary_info_start = .; + .binary_info : + { + KEEP(*(.binary_info.keep.*)) + *(.binary_info.*) + } > FLASH + __binary_info_end = .; + . = ALIGN(4); + + .ram_vector_table (NOLOAD): { + *(.ram_vector_table) + } > RAM + + .uninitialized_data (NOLOAD): { + . = ALIGN(4); + *(.uninitialized_data*) + } > RAM + + .data : { + __data_start__ = .; + *(vtable) + + *(.time_critical*) + + /* remaining .text and .rodata; i.e. stuff we exclude above because we want it in RAM */ + *(.text*) + . = ALIGN(4); + *(.rodata*) + . = ALIGN(4); + + *(.data*) + *(.sdata*) + + . = ALIGN(4); + *(.after_data.*) + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__mutex_array_start = .); + KEEP(*(SORT(.mutex_array.*))) + KEEP(*(.mutex_array)) + PROVIDE_HIDDEN (__mutex_array_end = .); + + *(.jcr) + . = ALIGN(4); + } > RAM AT> FLASH + + .tdata : { + . = ALIGN(4); + *(.tdata .tdata.* .gnu.linkonce.td.*) + /* All data end */ + __tdata_end = .; + } > RAM AT> FLASH + PROVIDE(__data_end__ = .); + + /* __etext is (for backwards compatibility) the name of the .data init source pointer (...) */ + __etext = LOADADDR(.data); + + .tbss (NOLOAD) : { + . = ALIGN(4); + __bss_start__ = .; + __tls_base = .; + *(.tbss .tbss.* .gnu.linkonce.tb.*) + *(.tcommon) + + __tls_end = .; + } > RAM + + .bss (NOLOAD) : { + . = ALIGN(4); + __tbss_end = .; + + *(SORT_BY_ALIGNMENT(SORT_BY_NAME(.bss*))) + *(COMMON) + PROVIDE(__global_pointer$ = . + 2K); + *(.sbss*) + . = ALIGN(4); + __bss_end__ = .; + } > RAM + + .heap (NOLOAD): + { + __end__ = .; + end = __end__; + KEEP(*(.heap*)) + /* historically on GCC sbrk was growing past __HeapLimit to __StackLimit, however + to be more compatible, we now set __HeapLimit explicitly to where the end of the heap is */ + /* Change for MicroPython: don't include this, it increases reported firmware size. + /* . = ORIGIN(RAM) + LENGTH(RAM); */ + __HeapLimit = .; + } > RAM + + /* Start and end symbols must be word-aligned */ + .scratch_x : { + __scratch_x_start__ = .; + *(.scratch_x.*) + . = ALIGN(4); + __scratch_x_end__ = .; + } > SCRATCH_X AT > FLASH + __scratch_x_source__ = LOADADDR(.scratch_x); + + .scratch_y : { + __scratch_y_start__ = .; + *(.scratch_y.*) + . = ALIGN(4); + __scratch_y_end__ = .; + } > SCRATCH_Y AT > FLASH + __scratch_y_source__ = LOADADDR(.scratch_y); + + /* .stack*_dummy section doesn't contains any symbols. It is only + * used for linker to calculate size of stack sections, and assign + * values to stack symbols later + * + * stack1 section may be empty/missing if platform_launch_core1 is not used */ + + /* by default we put core 0 stack at the end of scratch Y, so that if core 1 + * stack is not used then all of SCRATCH_X is free. + */ + .stack1_dummy (NOLOAD): + { + *(.stack1*) + } > SCRATCH_X + .stack_dummy (NOLOAD): + { + KEEP(*(.stack*)) + } > SCRATCH_Y + + .flash_end : { + KEEP(*(.embedded_end_block*)) + PROVIDE(__flash_binary_end = .); + } > FLASH =0xaa + + /* PSRAM data section */ + .psram_data (NOLOAD): { + . = ALIGN(4); + *(.psram_data*) + PROVIDE(__psram_data_end = .); + } > PSRAM + + /* stack limit is poorly named, but historically is maximum heap ptr */ + __StackLimit = __bss_end__ + __micropy_c_heap_size__; + + /* Define start and end of internal RAM GC heap */ + __GcHeapStart = __StackLimit; /* after the C heap (sbrk limit) */ + __GcHeapEnd = ORIGIN(RAM) + LENGTH(RAM) - __micropy_extra_stack__; + + /* Define start and end of PSRAM GC heap */ + __PsramGcHeapStart = __psram_data_end; /* after the C heap (sbrk limit) */ + __PsramGcHeapEnd = ORIGIN(PSRAM) + LENGTH(PSRAM); + + /* Define start and end of C stack */ + __StackTop = ORIGIN(SCRATCH_Y) + LENGTH(SCRATCH_Y); + __StackBottom = __GcHeapEnd; + PROVIDE(__stack = __StackTop); + + /* picolibc and LLVM */ + PROVIDE (__heap_start = __end__); + PROVIDE (__heap_end = __HeapLimit); + PROVIDE( __tls_align = MAX(ALIGNOF(.tdata), ALIGNOF(.tbss)) ); + PROVIDE( __tls_size_align = (__tls_size + __tls_align - 1) & ~(__tls_align - 1)); + PROVIDE( __arm32_tls_tcb_offset = MAX(8, __tls_align) ); + + /* llvm-libc */ + PROVIDE (_end = __end__); + PROVIDE (__llvm_libc_heap_limit = __HeapLimit); + + /* Ensure internal RAM didn't overflow */ + ASSERT((__GcHeapEnd - __GcHeapStart) > 0, "Main RAM overflow") + + /* Check GC heap is at least 128kB */ + ASSERT((__PsramGcHeapEnd - __PsramGcHeapStart) > 128*1024, "GcHeap is too small") + + ASSERT( __binary_info_header_end - __logical_binary_start <= 1024, "Binary info must be in first 1024 bytes of the binary") + ASSERT( __embedded_block_end - __logical_binary_start <= 4096, "Embedded block must be in first 4096 bytes of the binary") + + /* todo assert on extra code */ +} diff --git a/ports/rp2/modmachine.c b/ports/rp2/modmachine.c index 2faf0bc6f8713..7e9881b6c5206 100644 --- a/ports/rp2/modmachine.c +++ b/ports/rp2/modmachine.c @@ -31,6 +31,8 @@ #include "mp_usbd.h" #include "modmachine.h" #include "uart.h" +#include "rp2_psram.h" +#include "rp2_flash.h" #include "clocks_extra.h" #include "hardware/pll.h" #include "hardware/structs/rosc.h" @@ -94,6 +96,11 @@ static mp_obj_t mp_machine_get_freq(void) { static void mp_machine_set_freq(size_t n_args, const mp_obj_t *args) { mp_int_t freq = mp_obj_get_int(args[0]); + + // If necessary, increase the flash divider before increasing the clock speed + const int old_freq = clock_get_hz(clk_sys); + rp2_flash_set_timing_for_freq(MAX(freq, old_freq)); + if (!set_sys_clock_khz(freq / 1000, false)) { mp_raise_ValueError(MP_ERROR_TEXT("cannot change frequency")); } @@ -111,10 +118,19 @@ static void mp_machine_set_freq(size_t n_args, const mp_obj_t *args) { } } } + + // If clock speed was reduced, maybe we can reduce the flash divider + if (freq < old_freq) { + rp2_flash_set_timing_for_freq(freq); + } + #if MICROPY_HW_ENABLE_UART_REPL setup_default_uart(); mp_uart_init(); #endif + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif } static void mp_machine_idle(void) { diff --git a/ports/rp2/mpconfigport.h b/ports/rp2/mpconfigport.h index 9a11e6048c6b4..1a4fa6fb5c7f0 100644 --- a/ports/rp2/mpconfigport.h +++ b/ports/rp2/mpconfigport.h @@ -71,8 +71,20 @@ #define MICROPY_CONFIG_ROM_LEVEL (MICROPY_CONFIG_ROM_LEVEL_EXTRA_FEATURES) #endif +#ifndef MICROPY_HW_ENABLE_PSRAM +#define MICROPY_HW_ENABLE_PSRAM (0) +#endif + // Memory allocation policies +#if MICROPY_HW_ENABLE_PSRAM +#define MICROPY_GC_STACK_ENTRY_TYPE uint32_t +#define MICROPY_ALLOC_GC_STACK_SIZE (1024) // Avoid slowdown when GC stack overflow causes a full sweep of PSRAM-backed heap +#else #define MICROPY_GC_STACK_ENTRY_TYPE uint16_t +#endif +#ifndef MICROPY_GC_SPLIT_HEAP +#define MICROPY_GC_SPLIT_HEAP MICROPY_HW_ENABLE_PSRAM // whether PSRAM is added to or replaces the heap +#endif #define MICROPY_ALLOC_PATH_MAX (128) #define MICROPY_QSTR_BYTES_IN_HASH (1) diff --git a/ports/rp2/rp2_flash.c b/ports/rp2/rp2_flash.c index c1acb54e75748..0cbe6d0fe8cc6 100644 --- a/ports/rp2/rp2_flash.c +++ b/ports/rp2/rp2_flash.c @@ -26,12 +26,22 @@ #include +#include "rp2_psram.h" #include "py/mphal.h" #include "py/runtime.h" #include "extmod/vfs.h" #include "modrp2.h" #include "hardware/flash.h" #include "pico/binary_info.h" +#ifdef PICO_RP2350 +#include "hardware/structs/ioqspi.h" +#include "hardware/structs/qmi.h" +#else +#include "hardware/structs/ssi.h" +#endif +#if MICROPY_HW_ENABLE_PSRAM +#include "hardware/xip_cache.h" +#endif #define BLOCK_SIZE_BYTES (FLASH_SECTOR_SIZE) @@ -70,16 +80,69 @@ bi_decl(bi_block_device( BINARY_INFO_BLOCK_DEV_FLAG_WRITE | BINARY_INFO_BLOCK_DEV_FLAG_PT_UNKNOWN)); +// Function to set the flash divisor to the correct divisor, assumes interrupts disabled +// and core1 locked out if relevant. +static void __no_inline_not_in_flash_func(rp2_flash_set_timing_internal)(int clock_hz) { + + // Use the minimum divisor assuming a 133MHz flash. + const int max_flash_freq = 133000000; + int divisor = (clock_hz + max_flash_freq - 1) / max_flash_freq; + + #if PICO_RP2350 + // Make sure flash is deselected - QMI doesn't appear to have a busy flag(!) + while ((ioqspi_hw->io[1].status & IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) != IO_QSPI_GPIO_QSPI_SS_STATUS_OUTTOPAD_BITS) { + ; + } + + // RX delay equal to the divisor means sampling at the same time as the next falling edge of SCK after the + // falling edge that generated the data. This is pretty tight at 133MHz but seems to work with the Winbond flash chips. + const int rxdelay = divisor; + qmi_hw->m[0].timing = (1 << QMI_M0_TIMING_COOLDOWN_LSB) | + rxdelay << QMI_M1_TIMING_RXDELAY_LSB | + divisor << QMI_M1_TIMING_CLKDIV_LSB; + + // Force a read through XIP to ensure the timing is applied + volatile uint32_t *ptr = (volatile uint32_t *)0x14000000; + (void)*ptr; + #else + // RP2040 SSI hardware only supports even divisors + if (divisor & 1) { + divisor += 1; + } + + // Wait for SSI not busy + while (ssi_hw->sr & SSI_SR_BUSY_BITS) { + ; + } + + // Disable, set the new divisor, and re-enable + hw_clear_bits(&ssi_hw->ssienr, SSI_SSIENR_SSI_EN_BITS); + ssi_hw->baudr = divisor; + hw_set_bits(&ssi_hw->ssienr, SSI_SSIENR_SSI_EN_BITS); + #endif +} + // Flash erase and write must run with interrupts disabled and the other core suspended, // because the XIP bit gets disabled. static uint32_t begin_critical_flash_section(void) { if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { multicore_lockout_start_blocking(); } - return save_and_disable_interrupts(); + uint32_t state = save_and_disable_interrupts(); + + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + // We're about to invalidate the XIP cache, clean it first to commit any dirty writes to PSRAM + xip_cache_clean_all(); + #endif + + return state; } static void end_critical_flash_section(uint32_t state) { + rp2_flash_set_timing_internal(clock_get_hz(clk_sys)); + #if defined(MICROPY_HW_PSRAM_CS_PIN) && MICROPY_HW_ENABLE_PSRAM + psram_init(MICROPY_HW_PSRAM_CS_PIN); + #endif restore_interrupts(state); if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { multicore_lockout_end_blocking(); @@ -145,11 +208,16 @@ static mp_obj_t rp2_flash_readblocks(size_t n_args, const mp_obj_t *args) { } static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(rp2_flash_readblocks_obj, 3, 4, rp2_flash_readblocks); +static inline size_t min_size(size_t a, size_t b) { + return a < b ? a : b; +} + static mp_obj_t rp2_flash_writeblocks(size_t n_args, const mp_obj_t *args) { rp2_flash_obj_t *self = MP_OBJ_TO_PTR(args[0]); uint32_t offset = mp_obj_get_int(args[1]) * BLOCK_SIZE_BYTES; mp_buffer_info_t bufinfo; mp_get_buffer_raise(args[2], &bufinfo, MP_BUFFER_READ); + if (n_args == 3) { mp_uint_t atomic_state = begin_critical_flash_section(); flash_range_erase(self->flash_base + offset, bufinfo.len); @@ -159,10 +227,31 @@ static mp_obj_t rp2_flash_writeblocks(size_t n_args, const mp_obj_t *args) { } else { offset += mp_obj_get_int(args[3]); } - mp_uint_t atomic_state = begin_critical_flash_section(); - flash_range_program(self->flash_base + offset, bufinfo.buf, bufinfo.len); - end_critical_flash_section(atomic_state); - mp_event_handle_nowait(); + + if ((uintptr_t)bufinfo.buf >= SRAM_BASE) { + mp_uint_t atomic_state = begin_critical_flash_section(); + flash_range_program(self->flash_base + offset, bufinfo.buf, bufinfo.len); + end_critical_flash_section(atomic_state); + mp_event_handle_nowait(); + } else { + size_t bytes_left = bufinfo.len; + size_t bytes_offset = 0; + static uint8_t copy_buffer[BLOCK_SIZE_BYTES] = {0}; + + while (bytes_left) { + memcpy(copy_buffer, bufinfo.buf + bytes_offset, min_size(bytes_left, BLOCK_SIZE_BYTES)); + mp_uint_t atomic_state = begin_critical_flash_section(); + flash_range_program(self->flash_base + offset + bytes_offset, copy_buffer, min_size(bytes_left, BLOCK_SIZE_BYTES)); + end_critical_flash_section(atomic_state); + bytes_offset += BLOCK_SIZE_BYTES; + if (bytes_left <= BLOCK_SIZE_BYTES) { + break; + } + bytes_left -= BLOCK_SIZE_BYTES; + mp_event_handle_nowait(); + } + } + // TODO check return value return mp_const_none; } @@ -210,3 +299,23 @@ MP_DEFINE_CONST_OBJ_TYPE( make_new, rp2_flash_make_new, locals_dict, &rp2_flash_locals_dict ); + +// Modify the flash timing. Ensure flash access is suspended while +// the timings are altered. +void rp2_flash_set_timing_for_freq(int clock_hz) { + if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { + multicore_lockout_start_blocking(); + } + uint32_t state = save_and_disable_interrupts(); + + rp2_flash_set_timing_internal(clock_hz); + + restore_interrupts(state); + if (multicore_lockout_victim_is_initialized(1 - get_core_num())) { + multicore_lockout_end_blocking(); + } +} + +void rp2_flash_set_timing() { + rp2_flash_set_timing_for_freq(clock_get_hz(clk_sys)); +} diff --git a/ports/rp2/rp2_flash.h b/ports/rp2/rp2_flash.h new file mode 100644 index 0000000000000..d5cf3ba2acac0 --- /dev/null +++ b/ports/rp2/rp2_flash.h @@ -0,0 +1,7 @@ +#ifndef MICROPY_INCLUDED_RP2_MACHINE_FLASH_H +#define MICROPY_INCLUDED_RP2_MACHINE_FLASH_H + +extern void rp2_flash_set_timing_for_freq(int clock_hz); +extern void rp2_flash_set_timing(); + +#endif diff --git a/ports/rp2/rp2_psram.c b/ports/rp2/rp2_psram.c new file mode 100644 index 0000000000000..ae6a3fb0a562f --- /dev/null +++ b/ports/rp2/rp2_psram.c @@ -0,0 +1,167 @@ +#include "hardware/structs/ioqspi.h" +#include "hardware/structs/qmi.h" +#include "hardware/structs/xip_ctrl.h" +#include "hardware/clocks.h" +#include "hardware/sync.h" +#include "rp2_psram.h" + + +size_t __no_inline_not_in_flash_func(psram_detect)() { + int psram_size = 0; + + // Try and read the PSRAM ID via direct_csr. + qmi_hw->direct_csr = 30 << QMI_DIRECT_CSR_CLKDIV_LSB | QMI_DIRECT_CSR_EN_BITS; + + // Need to poll for the cooldown on the last XIP transfer to expire + // (via direct-mode BUSY flag) before it is safe to perform the first + // direct-mode operation + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) { + } + + // Exit out of QMI in case we've inited already + qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS; + + // Transmit as quad. + qmi_hw->direct_tx = QMI_DIRECT_TX_OE_BITS | QMI_DIRECT_TX_IWIDTH_VALUE_Q << QMI_DIRECT_TX_IWIDTH_LSB | 0xf5; + + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) { + } + + (void)qmi_hw->direct_rx; + + qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS); + + // Read the id + qmi_hw->direct_csr |= QMI_DIRECT_CSR_ASSERT_CS1N_BITS; + uint8_t kgd = 0; + uint8_t eid = 0; + + for (size_t i = 0; i < 7; i++) + { + if (i == 0) { + qmi_hw->direct_tx = 0x9f; + } else { + qmi_hw->direct_tx = 0xff; + } + + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_TXEMPTY_BITS) == 0) { + } + + while ((qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) != 0) { + } + + if (i == 5) { + kgd = qmi_hw->direct_rx; + } else if (i == 6) { + eid = qmi_hw->direct_rx; + } else { + (void)qmi_hw->direct_rx; + } + } + + // Disable direct csr. + qmi_hw->direct_csr &= ~(QMI_DIRECT_CSR_ASSERT_CS1N_BITS | QMI_DIRECT_CSR_EN_BITS); + + if (kgd == 0x5D) { + psram_size = 1024 * 1024; // 1 MiB + uint8_t size_id = eid >> 5; + if (eid == 0x26 || size_id == 2) { + psram_size *= 8; // 8 MiB + } else if (size_id == 0) { + psram_size *= 2; // 2 MiB + } else if (size_id == 1) { + psram_size *= 4; // 4 MiB + } + } + + return psram_size; +} + +size_t __no_inline_not_in_flash_func(psram_init)(uint cs_pin) { + gpio_set_function(cs_pin, GPIO_FUNC_XIP_CS1); + + uint32_t intr_stash = save_and_disable_interrupts(); + + size_t psram_size = psram_detect(); + + if (!psram_size) { + return 0; + } + + // Enable direct mode, PSRAM CS, clkdiv of 10. + qmi_hw->direct_csr = 10 << QMI_DIRECT_CSR_CLKDIV_LSB | \ + QMI_DIRECT_CSR_EN_BITS | \ + QMI_DIRECT_CSR_AUTO_CS1N_BITS; + while (qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) { + ; + } + + // Enable QPI mode on the PSRAM + const uint CMD_QPI_EN = 0x35; + qmi_hw->direct_tx = QMI_DIRECT_TX_NOPUSH_BITS | CMD_QPI_EN; + + while (qmi_hw->direct_csr & QMI_DIRECT_CSR_BUSY_BITS) { + ; + } + + // Set PSRAM timing for APS6404 + // + // Using an rxdelay equal to the divisor isn't enough when running the APS6404 close to 133MHz. + // So: don't allow running at divisor 1 above 100MHz (because delay of 2 would be too late), + // and add an extra 1 to the rxdelay if the divided clock is > 100MHz (i.e. sys clock > 200MHz). + const int max_psram_freq = 133000000; + const int clock_hz = clock_get_hz(clk_sys); + int divisor = (clock_hz + max_psram_freq - 1) / max_psram_freq; + if (divisor == 1 && clock_hz > 100000000) { + divisor = 2; + } + int rxdelay = divisor; + if (clock_hz / divisor > 100000000) { + rxdelay += 1; + } + + // - Max select must be <= 8us. The value is given in multiples of 64 system clocks. + // - Min deselect must be >= 18ns. The value is given in system clock cycles - ceil(divisor / 2). + const int clock_period_fs = 1000000000000000ll / clock_hz; + const int max_select = (125 * 1000000) / clock_period_fs; // 125 = 8000ns / 64 + const int min_deselect = (18 * 1000000 + (clock_period_fs - 1)) / clock_period_fs - (divisor + 1) / 2; + + qmi_hw->m[1].timing = 1 << QMI_M1_TIMING_COOLDOWN_LSB | + QMI_M1_TIMING_PAGEBREAK_VALUE_1024 << QMI_M1_TIMING_PAGEBREAK_LSB | + max_select << QMI_M1_TIMING_MAX_SELECT_LSB | + min_deselect << QMI_M1_TIMING_MIN_DESELECT_LSB | + rxdelay << QMI_M1_TIMING_RXDELAY_LSB | + divisor << QMI_M1_TIMING_CLKDIV_LSB; + + // Set PSRAM commands and formats + qmi_hw->m[1].rfmt = + QMI_M0_RFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_PREFIX_WIDTH_LSB | \ + QMI_M0_RFMT_ADDR_WIDTH_VALUE_Q << QMI_M0_RFMT_ADDR_WIDTH_LSB | \ + QMI_M0_RFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M0_RFMT_SUFFIX_WIDTH_LSB | \ + QMI_M0_RFMT_DUMMY_WIDTH_VALUE_Q << QMI_M0_RFMT_DUMMY_WIDTH_LSB | \ + QMI_M0_RFMT_DATA_WIDTH_VALUE_Q << QMI_M0_RFMT_DATA_WIDTH_LSB | \ + QMI_M0_RFMT_PREFIX_LEN_VALUE_8 << QMI_M0_RFMT_PREFIX_LEN_LSB | \ + 6 << QMI_M0_RFMT_DUMMY_LEN_LSB; + + qmi_hw->m[1].rcmd = 0xEB; + + qmi_hw->m[1].wfmt = + QMI_M0_WFMT_PREFIX_WIDTH_VALUE_Q << QMI_M0_WFMT_PREFIX_WIDTH_LSB | \ + QMI_M0_WFMT_ADDR_WIDTH_VALUE_Q << QMI_M0_WFMT_ADDR_WIDTH_LSB | \ + QMI_M0_WFMT_SUFFIX_WIDTH_VALUE_Q << QMI_M0_WFMT_SUFFIX_WIDTH_LSB | \ + QMI_M0_WFMT_DUMMY_WIDTH_VALUE_Q << QMI_M0_WFMT_DUMMY_WIDTH_LSB | \ + QMI_M0_WFMT_DATA_WIDTH_VALUE_Q << QMI_M0_WFMT_DATA_WIDTH_LSB | \ + QMI_M0_WFMT_PREFIX_LEN_VALUE_8 << QMI_M0_WFMT_PREFIX_LEN_LSB; + + qmi_hw->m[1].wcmd = 0x38; + + // Disable direct mode + qmi_hw->direct_csr = 0; + + // Enable writes to PSRAM + hw_set_bits(&xip_ctrl_hw->ctrl, XIP_CTRL_WRITABLE_M1_BITS); + + restore_interrupts(intr_stash); + + return psram_size; +} diff --git a/ports/rp2/rp2_psram.h b/ports/rp2/rp2_psram.h new file mode 100644 index 0000000000000..29ef54f8a3617 --- /dev/null +++ b/ports/rp2/rp2_psram.h @@ -0,0 +1,8 @@ +#include "pico/stdlib.h" + +#ifndef MICROPY_INCLUDED_RP2_MACHINE_PSRAM_H +#define MICROPY_INCLUDED_RP2_MACHINE_PSRAM_H + +extern size_t psram_init(uint cs_pin); + +#endif