@@ -172,6 +172,13 @@ static void udelay(uint32_t delay_us)
172172 wait_ticks (delay_us * DELAY_US );
173173}
174174
175+ /* Pre-computed DELAY_US value for ram_udelay() (RAMFUNCTION).
176+ * Must be initialized before any flash operations begin.
177+ * ram_udelay() cannot call DELAY_US macro because it expands to
178+ * hal_get_plat_clk() which lives in flash .text — during flash command
179+ * mode, instruction fetch from flash returns status data, not code. */
180+ static uint32_t ram_delay_ticks ;
181+
175182#if defined(ENABLE_IFC ) && !defined(BUILD_LOADER_STAGE1 )
176183static int hal_flash_getid (void )
177184{
@@ -228,6 +235,12 @@ static void hal_flash_init(void)
228235 * Flash write/erase use RAMFUNCTION to execute from DDR during
229236 * flash command mode (after .ramcode relocation in hal_init). */
230237#endif /* ENABLE_IFC */
238+
239+ /* Pre-compute timebase ticks per microsecond for ram_udelay().
240+ * Must be done while flash is in read mode — DELAY_US macro expands to
241+ * hal_get_plat_clk() which is in flash .text and cannot be called from
242+ * RAMFUNCTION code during flash command mode. */
243+ ram_delay_ticks = DELAY_US ;
231244}
232245
233246void hal_ddr_init (void )
@@ -688,46 +701,103 @@ void hal_init(void)
688701#endif
689702}
690703
704+ /* Direct UART character output from RAMFUNCTION code.
705+ * Uses inline get8/set8 to access UART registers in CCSR space (own TLB).
706+ * Safe to call during flash command mode — no flash code involved. */
707+ static void RAMFUNCTION ram_putchar (char c )
708+ {
709+ while (!(get8 (UART_LSR (UART_SEL )) & UART_LSR_THRE ))
710+ ;
711+ set8 (UART_THR (UART_SEL ), c );
712+ }
713+ static void RAMFUNCTION ram_puthex4 (uint8_t val )
714+ {
715+ val &= 0xF ;
716+ ram_putchar (val < 10 ? '0' + val : 'A' + val - 10 );
717+ }
718+ static void RAMFUNCTION ram_puthex16 (uint16_t val )
719+ {
720+ ram_puthex4 (val >> 12 );
721+ ram_puthex4 (val >> 8 );
722+ ram_puthex4 (val >> 4 );
723+ ram_puthex4 (val );
724+ }
725+
691726/* RAM-resident microsecond delay using inline timebase reads.
692- * Cannot call wait_ticks() (in flash .text) from RAMFUNCTION code
693- * while flash is in command mode — instruction fetch would return garbage. */
727+ * Uses pre-computed ram_delay_ticks (initialized before flash ops).
728+ * Cannot call DELAY_US macro — it expands to hal_get_plat_clk() in flash
729+ * .text. The linker generates a trampoline that jumps to flash; during
730+ * flash command mode, instruction fetch returns status data → crash. */
694731static void RAMFUNCTION ram_udelay (uint32_t delay_us )
695732{
696733 uint32_t tbl_start , tbl_now ;
697- uint32_t ticks = delay_us * DELAY_US ;
734+ uint32_t ticks = delay_us * ram_delay_ticks ;
698735 __asm__ __volatile__("mfspr %0,268" : "=r" (tbl_start ));
699736 do {
700737 __asm__ __volatile__("mfspr %0,268" : "=r" (tbl_now ));
701738 } while ((tbl_now - tbl_start ) < ticks );
702739}
703740
741+ /* Inline TLB write — all RAMFUNCTION flash code must avoid calling set_tlb(),
742+ * invalidate_dcache(), invalidate_icache() because those live in flash .text.
743+ * During flash command mode, I-cache misses to flash return status data instead
744+ * of instructions → PIL exception. Using mtspr/mfspr macros (inline asm) keeps
745+ * everything in DDR .ramcode. */
746+ static void RAMFUNCTION ram_write_tlb (uint32_t mas0 , uint32_t mas1 ,
747+ uint32_t mas2 , uint32_t mas3 , uint32_t mas7 )
748+ {
749+ mtspr (MAS0 , mas0 );
750+ mtspr (MAS1 , mas1 );
751+ mtspr (MAS2 , mas2 );
752+ mtspr (MAS3 , mas3 );
753+ mtspr (MAS7 , mas7 );
754+ __asm__ __volatile__("isync; msync; tlbwe; isync" );
755+ }
756+
704757/* Switch flash TLB to cache-inhibited + guarded for direct flash chip access.
705758 * AMD flash commands require writes to reach the chip immediately and status
706759 * reads to come directly from the chip. With MAS2_M (cacheable), stores go
707760 * through the CPC coherency fabric; IFC does not support coherent writes and
708- * returns a bus error (DSI). tlbre/tlbwe only modifies MAS2 and is unreliable
709- * when the entry has IPROT=1; use set_tlb() to rewrite all MAS fields.
710- * Must be called while flash is still in read-array mode (set_tlb lives in
711- * flash .text, reachable via longcall while TLB is still M/cacheable). */
761+ * returns a bus error (DSI).
762+ * Fully inlined — no calls to flash-resident set_tlb(). */
712763static void RAMFUNCTION hal_flash_cache_disable (void )
713764{
714- set_tlb (1 , 2 ,
715- FLASH_BASE_ADDR , FLASH_BASE_ADDR , FLASH_BASE_PHYS_HIGH ,
716- MAS3_SX | MAS3_SW | MAS3_SR , MAS2_I | MAS2_G , 0 ,
717- FLASH_TLB_PAGESZ , 1 );
765+ ram_write_tlb (
766+ BOOKE_MAS0 (1 , 2 , 0 ),
767+ BOOKE_MAS1 (1 , 1 , 0 , 0 , FLASH_TLB_PAGESZ ),
768+ BOOKE_MAS2 (FLASH_BASE_ADDR , MAS2_I | MAS2_G ),
769+ BOOKE_MAS3 (FLASH_BASE_ADDR , 0 , MAS3_SX | MAS3_SW | MAS3_SR ),
770+ BOOKE_MAS7 (FLASH_BASE_PHYS_HIGH ));
718771}
719772
720773/* Restore flash TLB to cacheable mode after flash operation.
721774 * Flash must be back in read-array mode before calling (AMD_CMD_RESET sent).
722- * Invalidate caches afterward so stale pre-erase data is not served. */
775+ * Invalidate caches afterward so stale pre-erase data is not served.
776+ * Fully inlined — no calls to flash-resident functions. */
723777static void RAMFUNCTION hal_flash_cache_enable (void )
724778{
725- set_tlb (1 , 2 ,
726- FLASH_BASE_ADDR , FLASH_BASE_ADDR , FLASH_BASE_PHYS_HIGH ,
727- MAS3_SX | MAS3_SW | MAS3_SR , MAS2_M , 0 ,
728- FLASH_TLB_PAGESZ , 1 );
729- invalidate_dcache ();
730- invalidate_icache ();
779+ uint32_t val ;
780+
781+ ram_write_tlb (
782+ BOOKE_MAS0 (1 , 2 , 0 ),
783+ BOOKE_MAS1 (1 , 1 , 0 , 0 , FLASH_TLB_PAGESZ ),
784+ BOOKE_MAS2 (FLASH_BASE_ADDR , MAS2_M ),
785+ BOOKE_MAS3 (FLASH_BASE_ADDR , 0 , MAS3_SX | MAS3_SW | MAS3_SR ),
786+ BOOKE_MAS7 (FLASH_BASE_PHYS_HIGH ));
787+
788+ /* Inline invalidate_dcache() — L1CSR0.CFI */
789+ val = mfspr (L1CSR0 );
790+ val |= L1CSR_CFI ;
791+ __asm__ __volatile__("msync; isync" );
792+ mtspr (L1CSR0 , val );
793+ __asm__ __volatile__("isync" );
794+
795+ /* Inline invalidate_icache() — L1CSR1.CFI */
796+ val = mfspr (L1CSR1 );
797+ val |= L1CSR_CFI ;
798+ __asm__ __volatile__("msync; isync" );
799+ mtspr (L1CSR1 , val );
800+ __asm__ __volatile__("isync" );
731801}
732802
733803/* Clear IFC write-protect. T2080RM says IFC_CSPR should only be written
@@ -847,38 +917,52 @@ static int RAMFUNCTION hal_flash_status_wait(uint32_t sector, uint16_t mask,
847917 uint32_t timeout = 0 ;
848918 uint16_t read1 , read2 ;
849919
850- /* Replicate 8-bit AMD status mask to both bytes for parallel chips */
920+ /* Replicate 8-bit AMD toggle bit to both bytes for parallel chips */
851921#if FLASH_CFI_WIDTH == 16
852- uint16_t mask16 = (mask << 8 ) | mask ;
853922 uint16_t toggle16 = (AMD_STATUS_TOGGLE << 8 ) | AMD_STATUS_TOGGLE ;
854923#else
855- uint16_t mask16 = mask ;
856924 uint16_t toggle16 = AMD_STATUS_TOGGLE ;
857925#endif
926+ (void )mask ; /* mask parameter reserved for future DQ7 data polling */
858927
859928 do {
860- /* detection of completion happens when reading status bits
861- * DQ6 and DQ2 stop toggling (0x44) */
929+ /* AMD toggle detection: DQ6 toggles on consecutive reads during
930+ * program/erase. When the operation completes, DQ6 reflects actual
931+ * data and consecutive reads return the same value.
932+ * NOTE: Do NOT check programmed data bits against a mask here —
933+ * after write completes, the data depends on what was written, not
934+ * on any fixed status bits. Only erase guarantees 0xFF data. */
862935#if FLASH_CFI_WIDTH == 16
863936 read1 = FLASH_IO16_READ (sector , 0 );
864- if ((read1 & toggle16 ) == 0 )
865- read1 = FLASH_IO16_READ (sector , 0 );
866937 read2 = FLASH_IO16_READ (sector , 0 );
867- if ((read2 & toggle16 ) == 0 )
868- read2 = FLASH_IO16_READ (sector , 0 );
869938#else
870939 read1 = FLASH_IO8_READ (sector , 0 );
871- if ((read1 & toggle16 ) == 0 )
872- read1 = FLASH_IO8_READ (sector , 0 );
873940 read2 = FLASH_IO8_READ (sector , 0 );
874- if ((read2 & toggle16 ) == 0 )
875- read2 = FLASH_IO8_READ (sector , 0 );
876941#endif
942+ /* Print first iteration reads for diagnostics */
943+ if (timeout == 0 ) {
944+ ram_putchar ('[' );
945+ ram_puthex16 (read1 );
946+ ram_putchar (':' );
947+ ram_puthex16 (read2 );
948+ ram_putchar (']' );
949+ }
877950 #ifdef DEBUG_FLASH
878951 wolfBoot_printf ("Wait toggle %x -> %x\n" , read1 , read2 );
879952 #endif
880- if (read1 == read2 && ((read1 & mask16 ) == mask16 ))
953+ /* DQ6 stopped toggling → operation complete */
954+ if (((read1 ^ read2 ) & toggle16 ) == 0 )
881955 break ;
956+ /* Check DQ5 (error) on both chips while still toggling */
957+ if (read1 & ((AMD_STATUS_ERROR << 8 ) | AMD_STATUS_ERROR )) {
958+ /* Read one more time to confirm it's not a false DQ5 */
959+ read1 = FLASH_IO16_READ (sector , 0 );
960+ read2 = FLASH_IO16_READ (sector , 0 );
961+ if (((read1 ^ read2 ) & toggle16 ) == 0 )
962+ break ; /* toggle stopped — was a race, not an error */
963+ ret = -2 ; /* DQ5 error — program/erase failed */
964+ break ;
965+ }
882966 ram_udelay (1 );
883967 } while (timeout ++ < timeout_us );
884968 if (timeout >= timeout_us ) {
@@ -914,13 +998,15 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len)
914998#endif
915999
9161000 /* Disable flash caching — AMD commands must reach the chip directly */
1001+ ram_putchar ('w' ); /* checkpoint: entering write */
9171002 hal_flash_cache_disable ();
9181003 hal_flash_clear_wp ();
9191004
9201005 /* Reset flash to read-array mode in case previous operation left it
9211006 * in command mode (e.g. after a timeout or incomplete operation) */
9221007 FLASH_IO8_WRITE (0 , 0 , AMD_CMD_RESET );
9231008 ram_udelay (50 );
1009+ ram_putchar ('r' ); /* checkpoint: reset done, starting loop */
9241010
9251011 pos = 0 ;
9261012 while (len > 0 ) {
@@ -939,9 +1025,10 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len)
9391025 #endif
9401026
9411027 hal_flash_unlock_sector (sector );
1028+ ram_putchar ('u' ); /* checkpoint: unlock done */
9421029 FLASH_IO8_WRITE (sector , offset , AMD_CMD_WRITE_TO_BUFFER );
943- /* Word count (N-1) must be replicated to both chips */
9441030 FLASH_IO8_WRITE (sector , offset , (nwords - 1 ));
1031+ ram_putchar ('b' ); /* checkpoint: buffer cmd + count sent */
9451032
9461033 for (i = 0 ; i < nwords ; i ++ ) {
9471034 const uint8_t * ptr = & data [pos ];
@@ -952,23 +1039,43 @@ int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len)
9521039 #endif
9531040 pos += (FLASH_CFI_WIDTH /8 );
9541041 }
1042+ ram_putchar ('l' ); /* checkpoint: data loaded */
1043+ /* Ensure all data stores reach IFC before confirm */
1044+ __asm__ __volatile__("sync" ::: "memory" );
9551045 FLASH_IO8_WRITE (sector , offset , AMD_CMD_WRITE_BUFFER_CONFIRM );
956- /* Typical 410us */
1046+ /* Ensure confirm write reaches flash before polling */
1047+ __asm__ __volatile__("sync; isync" ::: "memory" );
1048+ ram_putchar ('c' ); /* checkpoint: confirm sent */
9571049
958- /* poll for program completion - max 200ms */
1050+ /* poll for program completion - max 200ms (typical 410us) */
9591051 ret = hal_flash_status_wait (sector , 0x44 , 200 * 1000 );
1052+ if (ret == 0 ) {
1053+ ram_putchar ('p' ); /* pass */
1054+ } else if (ret == -2 ) {
1055+ ram_putchar ('E' ); /* DQ5 error */
1056+ } else {
1057+ ram_putchar ('T' ); /* timeout */
1058+ }
9601059 if (ret != 0 ) {
961- /* Reset flash to read-array mode BEFORE calling printf */
1060+ uint16_t readback ;
1061+ /* Reset flash to read-array mode BEFORE reading back data */
9621062 FLASH_IO8_WRITE (sector , 0 , AMD_CMD_RESET );
9631063 ram_udelay (50 );
964- wolfBoot_printf ("Flash Write: Timeout at sector %d\n" , sector );
1064+ /* Read back offset 0 to see if data was actually written */
1065+ readback = FLASH_IO16_READ (sector , 0 );
1066+ ram_putchar ('=' );
1067+ ram_puthex16 (readback );
1068+ wolfBoot_printf ("\nFlash Write: %s at sector %d (ret %d)\n" ,
1069+ ret == -2 ? "DQ5 error" : "Timeout" , sector , ret );
9651070 break ;
9661071 }
1072+ ram_putchar ('.' ); /* checkpoint: page write OK */
9671073
9681074 address += xfer ;
9691075 len -= xfer ;
9701076 }
9711077
1078+ ram_putchar ('d' ); /* checkpoint: write loop done */
9721079 /* Restore flash caching — flash is back in read-array mode */
9731080 hal_flash_cache_enable ();
9741081 return ret ;
@@ -1118,15 +1225,24 @@ static void hal_mp_up(uint32_t bootpg, uint32_t spin_table_ddr)
11181225 wolfBoot_printf ("MP: Timeout enabling additional cores!\n" );
11191226 }
11201227
1121- /* Disable all timebases */
1122- set32 (RCPM_PCTBENR , 0 );
1228+ /* Synchronize and reset timebase across all cores.
1229+ * On e6500, mtspr to TBL/TBU (SPR 284/285) may cause an illegal
1230+ * instruction exception — skip timebase reset if secondary cores
1231+ * did not start (timebase sync only matters for multi-core). */
1232+ if ((active_cores & all_cores ) == all_cores ) {
1233+ /* Disable all timebases */
1234+ set32 (RCPM_PCTBENR , 0 );
11231235
1124- /* Reset our timebase */
1125- mtspr (SPRN_TBWU , 0 );
1126- mtspr (SPRN_TBWL , 0 );
1236+ /* Reset our timebase */
1237+ mtspr (SPRN_TBWU , 0 );
1238+ mtspr (SPRN_TBWL , 0 );
11271239
1128- /* Enable timebase for all cores */
1129- set32 (RCPM_PCTBENR , all_cores );
1240+ /* Enable timebase for all cores */
1241+ set32 (RCPM_PCTBENR , all_cores );
1242+ } else {
1243+ /* Only re-enable timebase for boot core */
1244+ set32 (RCPM_PCTBENR , (1 << whoami ));
1245+ }
11301246}
11311247
11321248static void hal_mp_init (void )
0 commit comments