// // Implementation of RP2040 stage 2 boot loader. This code is derived from the // Winbond W25Q080 implementation (as found in the Pico) in the official Pico SDK. // // This implementation has been made 'stand-alone' by including necessary code / // symbols from the included files in the reference implementation directly into // the source. It has also been modified to include the conditional logic from // the CircuitPython implementation that supports additional flash chips. The // CiruitPython source is here: // https://github.com/adafruit/circuitpython/blob/main/ports/raspberrypi/stage2.c.jinja // // This file cannot be assembled directly, instead assemble the board-specific file // (such as pico-boot-stage2.S) which defines the parameters specific to the flash // chip included on that board. // // Care has been taken to preserve ordering and it has been verified the generated // binary is byte-for-byte identical to the reference code binary when assembled for // the Pico. // // Note: the stage 2 boot loader must be 256 bytes in length and have a checksum // present. In TinyGo, the linker script is responsible for allocating 256 bytes // for the .boot2 section and the build logic patches the checksum into the // binary after linking, controlled by the '.json' flag 'rp2040-boot-patch'. // // The stage 2 bootstrap section can be inspected in an elf file using this command: // objdump -s -j .boot2 .elf // // Original Source: // https://github.com/raspberrypi/pico-sdk/blob/master/src/rp2_common/boot_stage2/boot2_w25q080.S // // ---------------------------------------------------------------------------- // Second stage boot code // Copyright (c) 2019-2021 Raspberry Pi (Trading) Ltd. // SPDX-License-Identifier: BSD-3-Clause // // Device: Winbond W25Q080 // Also supports W25Q16JV (which has some different SR instructions) // Also supports AT25SF081 // Also supports S25FL132K0 // // Description: Configures W25Q080 to run in Quad I/O continuous read XIP mode // // Details: * Check status register 2 to determine if QSPI mode is enabled, // and perform an SR2 programming cycle if necessary. // * Use SSI to perform a dummy 0xEB read command, with the mode // continuation bits set, so that the flash will not require // 0xEB instruction prefix on subsequent reads. // * Configure SSI to write address, mode bits, but no instruction. // SSI + flash are now jointly in a state where continuous reads // can take place. // * Jump to exit pointer passed in via lr. Bootrom passes null, // in which case this code uses a default 256 byte flash offset // // Building: * This code must be position-independent, and use stack only // * The code will be padded to a size of 256 bytes, including a // 4-byte checksum. Therefore code size cannot exceed 252 bytes. // ---------------------------------------------------------------------------- // // Expanded include files // #define CMD_WRITE_ENABLE 0x06 #define CMD_READ_STATUS 0x05 #define CMD_READ_STATUS2 0x35 #define CMD_WRITE_STATUS1 0x01 #define CMD_WRITE_STATUS2 0x31 #define SREG_DATA 0x02 // Enable quad-SPI mode #define XIP_BASE 0x10000000 #define XIP_SSI_BASE 0x18000000 #define PADS_QSPI_BASE 0x40020000 #define PPB_BASE 0xe0000000 #define M0PLUS_VTOR_OFFSET 0x0000ed08 #define PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB 4 #define PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS 0x00000001 #define PADS_QSPI_GPIO_QSPI_SCLK_OFFSET 0x00000004 #define PADS_QSPI_GPIO_QSPI_SD0_OFFSET 0x00000008 #define PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS 0x00000002 #define PADS_QSPI_GPIO_QSPI_SD1_OFFSET 0x0000000c #define PADS_QSPI_GPIO_QSPI_SD2_OFFSET 0x00000010 #define PADS_QSPI_GPIO_QSPI_SD3_OFFSET 0x00000014 #define SSI_CTRLR0_OFFSET 0x00000000 #define SSI_CTRLR1_OFFSET 0x00000004 #define SSI_SSIENR_OFFSET 0x00000008 #define SSI_BAUDR_OFFSET 0x00000014 #define SSI_SR_OFFSET 0x00000028 #define SSI_DR0_OFFSET 0x00000060 #define SSI_RX_SAMPLE_DLY_OFFSET 0x000000f0 #define SSI_CTRLR0_DFS_32_LSB 16 #define SSI_CTRLR0_SPI_FRF_VALUE_QUAD 0x2 #define SSI_CTRLR0_SPI_FRF_LSB 21 #define SSI_CTRLR0_TMOD_VALUE_TX_AND_RX 0x0 #define SSI_CTRLR0_TMOD_VALUE_EEPROM_READ 0x3 #define SSI_CTRLR0_TMOD_LSB 8 #define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A 0x1 #define SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A 0x2 #define SSI_SPI_CTRLR0_OFFSET 0x000000f4 #define SSI_SPI_CTRLR0_INST_L_VALUE_NONE 0x0 #define SSI_SPI_CTRLR0_INST_L_VALUE_8B 0x2 #define SSI_SPI_CTRLR0_TRANS_TYPE_LSB 0 #define SSI_SPI_CTRLR0_ADDR_L_LSB 2 #define SSI_SPI_CTRLR0_INST_L_LSB 8 #define SSI_SPI_CTRLR0_WAIT_CYCLES_LSB 11 #define SSI_SPI_CTRLR0_XIP_CMD_LSB 24 #define SSI_SR_BUSY_BITS 0x00000001 #define SSI_SR_TFE_BITS 0x00000004 // ---------------------------------------------------------------------------- // Config section // ---------------------------------------------------------------------------- // It should be possible to support most flash devices by modifying this section // The serial flash interface will run at clk_sys/PICO_FLASH_SPI_CLKDIV. // This must be a positive, even integer. // The bootrom is very conservative with SPI frequency, but here we should be // as aggressive as possible. #define PICO_FLASH_SPI_CLKDIV BOARD_PICO_FLASH_SPI_CLKDIV #if PICO_FLASH_SPI_CLKDIV & 1 #error PICO_FLASH_SPI_CLKDIV must be even #endif #if BOARD_QUAD_OK==1 // Define interface width: single/dual/quad IO #define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_QUAD #define TRANSACTION_TYPE SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_2C2A // Note that the INST_L field is used to select what XIP data gets pushed into // the TX FIFO: // INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD // Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD #define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_NONE #define READ_INSTRUCTION MODE_CONTINUOUS_READ #define ADDR_L 8 // 6 for address, 2 for mode #else #define FRAME_FORMAT SSI_CTRLR0_SPI_FRF_VALUE_STD #define TRANSACTION_TYPE SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C1A #define INSTRUCTION_LENGTH SSI_SPI_CTRLR0_INST_L_VALUE_8B #define READ_INSTRUCTION BOARD_CMD_READ #define ADDR_L 6 // * 4 = 24 #endif // The flash-chip specific read isntruction #define CMD_READ BOARD_CMD_READ // "Mode bits" are 8 special bits sent immediately after // the address bits in a "Read Data Fast Quad I/O" command sequence. // On W25Q080, the four LSBs are don't care, and if MSBs == 0xa, the // next read does not require the 0xeb instruction prefix. #define MODE_CONTINUOUS_READ 0xa0 // How many clocks of Hi-Z following the mode bits. For W25Q080, 4 dummy cycles // are required. #define WAIT_CYCLES BOARD_WAIT_CYCLES // If defined, we will read status reg, compare to SREG_DATA, and overwrite // with our value if the SR doesn't match. // We do a two-byte write to SR1 (01h cmd) rather than a one-byte write to // SR2 (31h cmd) as the latter command isn't supported by WX25Q080. // This isn't great because it will remove block protections. // A better solution is to use a volatile SR write if your device supports it. #define PROGRAM_STATUS_REG .syntax unified .cpu cortex-m0plus .thumb .section .boot2, "ax" // The exit point is passed in lr. If entered from bootrom, this will be the // flash address immediately following this second stage (0x10000100). // Otherwise it will be a return address -- second stage being called as a // function by user code, after copying out of XIP region. r3 holds SSI base, // r0...2 used as temporaries. Other GPRs not used. .global _stage2_boot .type _stage2_boot,%function .thumb_func _stage2_boot: push {lr} // Set pad configuration: // - SCLK 8mA drive, no slew limiting // - SDx disable input Schmitt to reduce delay ldr r3, =PADS_QSPI_BASE movs r0, #(2 << PADS_QSPI_GPIO_QSPI_SCLK_DRIVE_LSB | PADS_QSPI_GPIO_QSPI_SCLK_SLEWFAST_BITS) str r0, [r3, #PADS_QSPI_GPIO_QSPI_SCLK_OFFSET] ldr r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] movs r1, #PADS_QSPI_GPIO_QSPI_SD0_SCHMITT_BITS bics r0, r1 #if BOARD_QUAD_OK==1 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD0_OFFSET] #endif str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD1_OFFSET] #if BOARD_QUAD_OK==1 str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD2_OFFSET] str r0, [r3, #PADS_QSPI_GPIO_QSPI_SD3_OFFSET] #endif ldr r3, =XIP_SSI_BASE // Disable SSI to allow further config movs r1, #0 str r1, [r3, #SSI_SSIENR_OFFSET] // Set baud rate movs r1, #PICO_FLASH_SPI_CLKDIV str r1, [r3, #SSI_BAUDR_OFFSET] // Set 1-cycle sample delay. If PICO_FLASH_SPI_CLKDIV == 2 then this means, // if the flash launches data on SCLK posedge, we capture it at the time that // the next SCLK posedge is launched. This is shortly before that posedge // arrives at the flash, so data hold time should be ok. For // PICO_FLASH_SPI_CLKDIV > 2 this pretty much has no effect. movs r1, #1 movs r2, #SSI_RX_SAMPLE_DLY_OFFSET // == 0xf0 so need 8 bits of offset significance str r1, [r3, r2] // On QSPI parts we usually need a 01h SR-write command to enable QSPI mode // (i.e. turn WPn and HOLDn into IO2/IO3) #ifdef PROGRAM_STATUS_REG program_sregs: #define CTRL0_SPI_TXRX \ (7 << SSI_CTRLR0_DFS_32_LSB) | /* 8 bits per data frame */ \ (SSI_CTRLR0_TMOD_VALUE_TX_AND_RX << SSI_CTRLR0_TMOD_LSB) ldr r1, =(CTRL0_SPI_TXRX) str r1, [r3, #SSI_CTRLR0_OFFSET] // Enable SSI and select slave 0 movs r1, #1 str r1, [r3, #SSI_SSIENR_OFFSET] // Check whether SR needs updating #if BOARD_QUAD_OK==1 # if BOARD_QUAD_ENABLE_STATUS_BYTE==1 movs r0, #CMD_READ_STATUS1 # elif BOARD_QUAD_ENABLE_STATUS_BYTE==2 movs r0, #CMD_READ_STATUS2 # endif bl read_flash_sreg movs r2, #BOARD_QUAD_ENABLE_BIT_MASK cmp r0, r2 beq skip_sreg_programming // Send write enable command movs r1, #CMD_WRITE_ENABLE str r1, [r3, #SSI_DR0_OFFSET] // Poll for completion and discard RX bl wait_ssi_ready ldr r1, [r3, #SSI_DR0_OFFSET] // Send status write command followed by data bytes # if BOARD_SPLIT_STATUS_WRITE==1 # if BOARD_QUAD_ENABLE_STATUS_BYTE==1 movs r1, #CMD_WRITE_STATUS1 # elif BOARD_QUAD_ENABLE_STATUS_BYTE==2 movs r1, #CMD_WRITE_STATUS2 # endif str r1, [r3, #SSI_DR0_OFFSET] str r2, [r3, #SSI_DR0_OFFSET] bl wait_ssi_ready //ldr r1, [r3, #SSI_DR0_OFFSET] ldr r1, [r3, #SSI_DR0_OFFSET] ldr r1, [r3, #SSI_DR0_OFFSET] # else movs r1, #CMD_WRITE_STATUS1 str r1, [r3, #SSI_DR0_OFFSET] # if BOARD_QUAD_ENABLE_STATUS_BYTE==2 movs r0, #0 str r0, [r3, #SSI_DR0_OFFSET] # endif str r2, [r3, #SSI_DR0_OFFSET] bl wait_ssi_ready ldr r1, [r3, #SSI_DR0_OFFSET] ldr r1, [r3, #SSI_DR0_OFFSET] # if BOARD_QUAD_ENABLE_STATUS_BYTE==2 ldr r1, [r3, #SSI_DR0_OFFSET] # endif # endif // Poll status register for write completion 1: movs r0, #CMD_READ_STATUS bl read_flash_sreg movs r1, #1 tst r0, r1 bne 1b #endif skip_sreg_programming: // Disable SSI again so that it can be reconfigured movs r1, #0 str r1, [r3, #SSI_SSIENR_OFFSET] #endif // Currently the flash expects an 8 bit serial command prefix on every // transfer, which is a waste of cycles. Perform a dummy Fast Read Quad I/O // command, with mode bits set such that the flash will not expect a serial // command prefix on *subsequent* transfers. We don't care about the results // of the read, the important part is the mode bits. dummy_read: #define CTRLR0_ENTER_XIP \ (FRAME_FORMAT /* Quad I/O mode */ \ << SSI_CTRLR0_SPI_FRF_LSB) | \ (31 << SSI_CTRLR0_DFS_32_LSB) | /* 32 data bits */ \ (SSI_CTRLR0_TMOD_VALUE_EEPROM_READ /* Send INST/ADDR, Receive Data */ \ << SSI_CTRLR0_TMOD_LSB) ldr r1, =(CTRLR0_ENTER_XIP) str r1, [r3, #SSI_CTRLR0_OFFSET] movs r1, #0x0 // NDF=0 (single 32b read) str r1, [r3, #SSI_CTRLR1_OFFSET] #if BOARD_QUAD_OK==1 #define SPI_CTRLR0_ENTER_XIP \ (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Address + mode bits */ \ (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ (SSI_SPI_CTRLR0_INST_L_VALUE_8B \ << SSI_SPI_CTRLR0_INST_L_LSB) | /* 8-bit instruction */ \ (SSI_SPI_CTRLR0_TRANS_TYPE_VALUE_1C2A /* Send Command in serial mode then address in Quad I/O mode */ \ << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) ldr r1, =(SPI_CTRLR0_ENTER_XIP) ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) // SPI_CTRL0 Register str r1, [r0] movs r1, #1 // Re-enable SSI str r1, [r3, #SSI_SSIENR_OFFSET] movs r1, #CMD_READ str r1, [r3, #SSI_DR0_OFFSET] // Push SPI command into TX FIFO movs r1, #MODE_CONTINUOUS_READ // 32-bit: 24 address bits (we don't care, so 0) and M[7:4]=1010 str r1, [r3, #SSI_DR0_OFFSET] // Push Address into TX FIFO - this will trigger the transaction // Poll for completion bl wait_ssi_ready // The flash is in a state where we can blast addresses in parallel, and get // parallel data back. Now configure the SSI to translate XIP bus accesses // into QSPI transfers of this form. movs r1, #0 str r1, [r3, #SSI_SSIENR_OFFSET] // Disable SSI (and clear FIFO) to allow further config #endif // Note that the INST_L field is used to select what XIP data gets pushed into // the TX FIFO: // INST_L_0_BITS {ADDR[23:0],XIP_CMD[7:0]} Load "mode bits" into XIP_CMD // Anything else {XIP_CMD[7:0],ADDR[23:0]} Load SPI command into XIP_CMD configure_ssi: #define SPI_CTRLR0_XIP \ (READ_INSTRUCTION /* Mode bits to keep flash in continuous read mode */ \ << SSI_SPI_CTRLR0_XIP_CMD_LSB) | \ (ADDR_L << SSI_SPI_CTRLR0_ADDR_L_LSB) | /* Total number of address + mode bits */ \ (WAIT_CYCLES << SSI_SPI_CTRLR0_WAIT_CYCLES_LSB) | /* Hi-Z dummy clocks following address + mode */ \ (INSTRUCTION_LENGTH /* Do not send a command, instead send XIP_CMD as mode bits after address */ \ << SSI_SPI_CTRLR0_INST_L_LSB) | \ (TRANSACTION_TYPE /* Send Address in Quad I/O mode (and Command but that is zero bits long) */ \ << SSI_SPI_CTRLR0_TRANS_TYPE_LSB) ldr r1, =(SPI_CTRLR0_XIP) ldr r0, =(XIP_SSI_BASE + SSI_SPI_CTRLR0_OFFSET) str r1, [r0] movs r1, #1 str r1, [r3, #SSI_SSIENR_OFFSET] // Re-enable SSI // Bus accesses to the XIP window will now be transparently serviced by the // external flash on cache miss. We are ready to run code from flash. // // Helper Includes // // // #include "boot2_helpers/exit_from_boot2.S" // // If entered from the bootrom, lr (which we earlier pushed) will be 0, // and we vector through the table at the start of the main flash image. // Any regular function call will have a nonzero value for lr. check_return: pop {r0} cmp r0, #0 beq vector_into_flash bx r0 vector_into_flash: ldr r0, =(XIP_BASE + 0x100) ldr r1, =(PPB_BASE + M0PLUS_VTOR_OFFSET) str r0, [r1] ldmia r0, {r0, r1} msr msp, r0 bx r1 // // #include "boot2_helpers/wait_ssi_ready.S" // wait_ssi_ready: push {r0, r1, lr} // Command is complete when there is nothing left to send // (TX FIFO empty) and SSI is no longer busy (CSn deasserted) 1: ldr r1, [r3, #SSI_SR_OFFSET] movs r0, #SSI_SR_TFE_BITS tst r1, r0 beq 1b movs r0, #SSI_SR_BUSY_BITS tst r1, r0 bne 1b pop {r0, r1, pc} #ifdef PROGRAM_STATUS_REG // // #include "boot2_helpers/read_flash_sreg.S" // // Pass status read cmd into r0. // Returns status value in r0. .global read_flash_sreg .type read_flash_sreg,%function .thumb_func read_flash_sreg: push {r1, lr} str r0, [r3, #SSI_DR0_OFFSET] // Dummy byte: str r0, [r3, #SSI_DR0_OFFSET] bl wait_ssi_ready // Discard first byte and combine the next two ldr r0, [r3, #SSI_DR0_OFFSET] ldr r0, [r3, #SSI_DR0_OFFSET] pop {r1, pc} #endif .global literals literals: .ltorg .end