3232#define ENABLE_BUS_CLK_CALC
3333
3434#ifndef BUILD_LOADER_STAGE1
35- /* TODO: Fix e6500 MP initialization - secondary cores not responding.
36- * Disable MP for now to focus on getting basic boot working. */
37- /* #define ENABLE_MP */ /* multi-core support */
35+ #define ENABLE_MP /* multi-core support */
3836#endif
3937
4038/* Forward declarations */
41- static void hal_flash_unlock_sector (uint32_t sector );
39+ static void RAMFUNCTION hal_flash_unlock_sector (uint32_t sector );
4240#ifdef ENABLE_MP
4341static void hal_mp_init (void );
4442#endif
@@ -202,24 +200,32 @@ static int hal_flash_getid(void)
202200static void hal_flash_init (void )
203201{
204202#ifdef ENABLE_IFC
203+ uint32_t cspr ;
204+
205205 /* IFC CS0 - NOR Flash
206- * Do NOT reprogram IFC CS0 (CSPR, AMASK, CSOR, FTIM) while executing
207- * from flash (XIP) with cache-inhibited TLB (MAS2_I|MAS2_G). The boot
208- * ROM already configured CS0 correctly. Reprogramming CSPR while XIP
209- * can cause instruction fetch failures because there is no cache to
210- * serve fetches during the chip-select decode transition.
211- *
212- * U-Boot avoids this by using MAS2_W|MAS2_G (write-through, cached)
213- * during XIP, only switching to MAS2_I|MAS2_G after relocating to RAM.
206+ * Do NOT reprogram IFC CS0 base address, port size, AMASK, CSOR, or
207+ * FTIM while executing from flash (XIP). The boot ROM already
208+ * configured CS0 correctly.
214209 *
215- * The LAW is also already set in boot_ppc_start.S:flash_law.
216- */
210+ * However, the boot ROM may set IFC_CSPR_WP (write-protect), which
211+ * blocks all write cycles to the flash. This prevents AMD command
212+ * sequences (erase/program) from reaching the chips. Clearing just
213+ * the WP bit is safe during XIP — it doesn't change chip-select
214+ * decode, only enables write forwarding. */
215+ cspr = get32 (IFC_CSPR (0 ));
216+ #ifdef DEBUG_UART
217+ wolfBoot_printf ("IFC CSPR0: 0x%x%s\n" , cspr ,
218+ (cspr & IFC_CSPR_WP ) ? " (WP set)" : "" );
219+ #endif
220+ if (cspr & IFC_CSPR_WP ) {
221+ set32 (IFC_CSPR (0 ), cspr & ~IFC_CSPR_WP );
222+ }
217223
218224 /* Note: hal_flash_getid() is disabled because AMD Autoselect mode
219225 * affects the entire flash bank. Since wolfBoot runs XIP from the same
220226 * bank (CS0), entering Autoselect mode crashes instruction fetch.
221- * Flash write/erase operations will need RAMFUNCTION support.
222- * TODO: Implement RAMFUNCTION for flash operations on T2080 . */
227+ * Flash write/erase use RAMFUNCTION to execute from DDR during
228+ * flash command mode (after .ramcode relocation in hal_init) . */
223229#endif /* ENABLE_IFC */
224230}
225231
@@ -630,10 +636,6 @@ void hal_init(void)
630636#endif
631637#endif /* ENABLE_CPLD */
632638
633- #ifdef ENABLE_MP
634- hal_mp_init ();
635- #endif
636-
637639#ifdef ENABLE_DDR
638640 /* Test DDR (when DEBUG_UART enabled) */
639641#ifdef DEBUG_UART
@@ -651,17 +653,78 @@ void hal_init(void)
651653 hal_reconfigure_cpc_as_cache ();
652654 hal_flash_enable_caching ();
653655#endif
656+
657+ #ifdef ENABLE_MP
658+ /* Start secondary cores AFTER CPC release and flash caching.
659+ * Secondary cores' L2 flash-invalidate on the shared cluster L2
660+ * must not disrupt the CPC SRAM→cache transition. Starting them
661+ * after ensures the cache hierarchy is fully stable. */
662+ hal_mp_init ();
663+ #endif
664+ }
665+
666+ /* RAM-resident microsecond delay using inline timebase reads.
667+ * Cannot call wait_ticks() (in flash .text) from RAMFUNCTION code
668+ * while flash is in command mode — instruction fetch would return garbage. */
669+ static void RAMFUNCTION ram_udelay (uint32_t delay_us )
670+ {
671+ uint32_t tbl_start , tbl_now ;
672+ uint32_t ticks = delay_us * DELAY_US ;
673+ __asm__ __volatile__("mfspr %0,268" : "=r" (tbl_start ));
674+ do {
675+ __asm__ __volatile__("mfspr %0,268" : "=r" (tbl_now ));
676+ } while ((tbl_now - tbl_start ) < ticks );
654677}
655678
656- static void hal_flash_unlock_sector (uint32_t sector )
679+ /* Switch flash TLB to cache-inhibited for direct flash chip access.
680+ * AMD flash commands require writes to reach the chip immediately and
681+ * status reads to come directly from the chip. With MAS2_M (cacheable),
682+ * writes are cached and never reach the flash, reads return stale data.
683+ * Uses direct SPR manipulation to avoid calling .text functions. */
684+ static void RAMFUNCTION hal_flash_cache_disable (void )
685+ {
686+ uint32_t mas2 ;
687+ /* Select TLB1, entry 2 (flash) */
688+ mtspr (MAS0 , BOOKE_MAS0 (1 , 2 , 0 ));
689+ __asm__ __volatile__("isync; tlbre; isync" );
690+ /* Change WIMGE from M to I|G */
691+ mas2 = mfspr (MAS2 );
692+ mas2 &= ~0x1F ; /* clear WIMGE bits */
693+ mas2 |= (MAS2_I | MAS2_G );
694+ mtspr (MAS2 , mas2 );
695+ __asm__ __volatile__("isync; msync; tlbwe; isync" );
696+ }
697+
698+ /* Restore flash TLB to cacheable mode after flash operation.
699+ * Flash is back in read-array mode, safe to cache again. */
700+ static void RAMFUNCTION hal_flash_cache_enable (void )
701+ {
702+ uint32_t mas2 ;
703+ /* Select TLB1, entry 2 (flash) */
704+ mtspr (MAS0 , BOOKE_MAS0 (1 , 2 , 0 ));
705+ __asm__ __volatile__("isync; tlbre; isync" );
706+ /* Change WIMGE from I|G to M (cacheable) */
707+ mas2 = mfspr (MAS2 );
708+ mas2 &= ~0x1F ;
709+ mas2 |= MAS2_M ;
710+ mtspr (MAS2 , mas2 );
711+ __asm__ __volatile__("isync; msync; tlbwe; isync" );
712+ /* Invalidate D-cache and I-cache — stale entries from before
713+ * the flash operation must be discarded */
714+ invalidate_dcache ();
715+ invalidate_icache ();
716+ }
717+
718+ static void RAMFUNCTION hal_flash_unlock_sector (uint32_t sector )
657719{
658720 /* AMD unlock sequence */
659721 FLASH_IO8_WRITE (sector , FLASH_UNLOCK_ADDR1 , AMD_CMD_UNLOCK_START );
660722 FLASH_IO8_WRITE (sector , FLASH_UNLOCK_ADDR2 , AMD_CMD_UNLOCK_ACK );
661723}
662724
663- /* wait for toggle to stop and status mask to be met within microsecond timeout */
664- static int hal_flash_status_wait (uint32_t sector , uint16_t mask ,
725+ /* wait for toggle to stop and status mask to be met within microsecond timeout.
726+ * RAMFUNCTION: executes from DDR while flash is in program/erase command mode. */
727+ static int RAMFUNCTION hal_flash_status_wait (uint32_t sector , uint16_t mask ,
665728 uint32_t timeout_us )
666729{
667730 int ret = 0 ;
@@ -682,7 +745,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask,
682745 #endif
683746 if (read1 == read2 && ((read1 & mask ) == mask ))
684747 break ;
685- udelay (1 );
748+ ram_udelay (1 );
686749 } while (timeout ++ < timeout_us );
687750 if (timeout >= timeout_us ) {
688751 ret = -1 ; /* timeout */
@@ -694,7 +757,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask,
694757 return ret ;
695758}
696759
697- int hal_flash_write (uint32_t address , const uint8_t * data , int len )
760+ int RAMFUNCTION hal_flash_write (uint32_t address , const uint8_t * data , int len )
698761{
699762 int ret = 0 ;
700763 uint32_t i , pos , sector , offset , xfer , nwords ;
@@ -708,6 +771,9 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len)
708771 data , address , len );
709772#endif
710773
774+ /* Disable flash caching — AMD commands must reach the chip directly */
775+ hal_flash_cache_disable ();
776+
711777 pos = 0 ;
712778 while (len > 0 ) {
713779 /* determine sector address */
@@ -754,10 +820,13 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len)
754820 address += xfer ;
755821 len -= xfer ;
756822 }
823+
824+ /* Restore flash caching — flash is back in read-array mode */
825+ hal_flash_cache_enable ();
757826 return ret ;
758827}
759828
760- int hal_flash_erase (uint32_t address , int len )
829+ int RAMFUNCTION hal_flash_erase (uint32_t address , int len )
761830{
762831 int ret = 0 ;
763832 uint32_t sector ;
@@ -766,6 +835,9 @@ int hal_flash_erase(uint32_t address, int len)
766835 if (address >= FLASH_BASE_ADDR )
767836 address -= FLASH_BASE_ADDR ;
768837
838+ /* Disable flash caching — AMD commands must reach the chip directly */
839+ hal_flash_cache_disable ();
840+
769841 while (len > 0 ) {
770842 /* determine sector address */
771843 sector = (address / FLASH_SECTOR_SIZE );
@@ -792,10 +864,13 @@ int hal_flash_erase(uint32_t address, int len)
792864 address += FLASH_SECTOR_SIZE ;
793865 len -= FLASH_SECTOR_SIZE ;
794866 }
867+
868+ /* Restore flash caching — flash is back in read-array mode */
869+ hal_flash_cache_enable ();
795870 return ret ;
796871}
797872
798- void hal_flash_unlock (void )
873+ void RAMFUNCTION hal_flash_unlock (void )
799874{
800875 /* Per-sector unlock is done in hal_flash_write/erase before each operation.
801876 * The previous non-volatile PPB protection mode (C0h) approach caused
@@ -818,8 +893,9 @@ extern uint32_t _spin_table[];
818893extern uint32_t _spin_table_addr ;
819894extern uint32_t _bootpg_addr ;
820895
821- /* Startup additional cores with spin table and synchronize the timebase */
822- static void hal_mp_up (uint32_t bootpg )
896+ /* Startup additional cores with spin table and synchronize the timebase.
897+ * spin_table_ddr: DDR address of the spin table (for checking status) */
898+ static void hal_mp_up (uint32_t bootpg , uint32_t spin_table_ddr )
823899{
824900 uint32_t all_cores , active_cores , whoami ;
825901 int timeout = 50 , i ;
@@ -829,7 +905,7 @@ static void hal_mp_up(uint32_t bootpg)
829905 active_cores = (1 << whoami ); /* current running cores */
830906
831907 wolfBoot_printf ("MP: Starting cores (boot page %p, spin table %p)\n" ,
832- bootpg , ( uint32_t ) _spin_table );
908+ bootpg , spin_table_ddr );
833909
834910 /* Set the boot page translation register */
835911 set32 (LCC_BSTRH , 0 );
@@ -849,8 +925,8 @@ static void hal_mp_up(uint32_t bootpg)
849925 /* wait for other core(s) to start */
850926 while (timeout ) {
851927 for (i = 0 ; i < CPU_NUMCORES ; i ++ ) {
852- uint32_t * entry = (uint32_t * )(
853- ( uint8_t * ) _spin_table + (i * ENTRY_SIZE ) + ENTRY_ADDR_LOWER );
928+ volatile uint32_t * entry = (volatile uint32_t * )(
929+ spin_table_ddr + (i * ENTRY_SIZE ) + ENTRY_ADDR_LOWER );
854930 if (* entry ) {
855931 active_cores |= (1 << i );
856932 }
@@ -881,7 +957,7 @@ static void hal_mp_up(uint32_t bootpg)
881957static void hal_mp_init (void )
882958{
883959 uint32_t * fixup = (uint32_t * )& _secondary_start_page ;
884- uint32_t bootpg ;
960+ uint32_t bootpg , second_half_ddr , spin_table_ddr ;
885961 int i_tlb = 0 ; /* always 0 */
886962 size_t i ;
887963 const volatile uint32_t * s ;
@@ -893,31 +969,60 @@ static void hal_mp_init(void)
893969 * size to ensure bootpg fits in 32 bits and is accessible. */
894970 bootpg = DDR_ADDRESS + 0x80000000UL - BOOT_ROM_SIZE ;
895971
896- /* Store the boot page address for use by additional CPU cores */
897- _bootpg_addr = (uint32_t )& _second_half_boot_page ;
972+ /* Second half boot page (spin loop + spin table) goes just below.
973+ * For XIP flash builds, .bootmp is in flash — secondary cores can't
974+ * write to flash, so the spin table MUST be in DDR. */
975+ second_half_ddr = bootpg - BOOT_ROM_SIZE ;
898976
899- /* Store location of spin table for other cores */
900- _spin_table_addr = (uint32_t )_spin_table ;
977+ /* DDR addresses for second half symbols */
978+ spin_table_ddr = second_half_ddr +
979+ ((uint32_t )_spin_table - (uint32_t )& _second_half_boot_page );
901980
902- /* Flush bootpg before copying to invalidate any stale cache lines */
981+ /* Flush DDR destination before copying */
903982 flush_cache (bootpg , BOOT_ROM_SIZE );
983+ flush_cache (second_half_ddr , BOOT_ROM_SIZE );
904984
905- /* Map reset page to bootpg so we can copy code there */
985+ /* Map reset page to bootpg so we can copy code there.
986+ * Boot page translation will redirect secondary core fetches from
987+ * 0xFFFFF000 to bootpg in DDR. */
906988 disable_tlb1 (i_tlb );
907989 set_tlb (1 , i_tlb , BOOT_ROM_ADDR , bootpg , 0 , /* tlb, epn, rpn, urpn */
908990 (MAS3_SX | MAS3_SW | MAS3_SR ), (MAS2_I | MAS2_G ), /* perms, wimge */
909991 0 , BOOKE_PAGESZ_4K , 1 ); /* ts, esel, tsize, iprot */
910992
911- /* copy startup code to virtually mapped boot address */
912- /* do not use memcpy due to compiler array bounds report (not valid) */
993+ /* Copy first half ( startup code) to DDR via BOOT_ROM_ADDR mapping.
994+ * Uses cache-inhibited TLB to ensure data reaches DDR immediately. */
913995 s = (const uint32_t * )fixup ;
914996 d = (uint32_t * )BOOT_ROM_ADDR ;
915997 for (i = 0 ; i < BOOT_ROM_SIZE /4 ; i ++ ) {
916998 d [i ] = s [i ];
917999 }
9181000
919- /* start core and wait for it to be enabled */
920- hal_mp_up (bootpg );
1001+ /* Write _bootpg_addr and _spin_table_addr into the DDR first-half copy.
1002+ * These variables are .long 0 in the linked .bootmp (flash), and direct
1003+ * stores to their flash addresses silently fail on XIP builds.
1004+ * Calculate offsets within the boot page and write via BOOT_ROM_ADDR. */
1005+ {
1006+ volatile uint32_t * bp = (volatile uint32_t * )(BOOT_ROM_ADDR +
1007+ ((uint32_t )& _bootpg_addr - (uint32_t )& _secondary_start_page ));
1008+ volatile uint32_t * st = (volatile uint32_t * )(BOOT_ROM_ADDR +
1009+ ((uint32_t )& _spin_table_addr - (uint32_t )& _secondary_start_page ));
1010+ * bp = second_half_ddr ;
1011+ * st = spin_table_ddr ;
1012+ }
1013+
1014+ /* Copy second half (spin loop + spin table) directly to DDR.
1015+ * Master has DDR TLB (entry 12, MAS2_M). Flush cache after copy
1016+ * to ensure secondary cores see the data. */
1017+ s = (const uint32_t * )& _second_half_boot_page ;
1018+ d = (uint32_t * )second_half_ddr ;
1019+ for (i = 0 ; i < BOOT_ROM_SIZE /4 ; i ++ ) {
1020+ d [i ] = s [i ];
1021+ }
1022+ flush_cache (second_half_ddr , BOOT_ROM_SIZE );
1023+
1024+ /* start cores and wait for them to be enabled */
1025+ hal_mp_up (bootpg , spin_table_ddr );
9211026}
9221027#endif /* ENABLE_MP */
9231028
0 commit comments