@@ -244,9 +244,10 @@ hardware_reg:
244244
245245#ifndef BUILD_LOADER_STAGE1
246246branch_prediction:
247- /* enable branch prediction */
248- lis r0, (BUCSR_ENABLE)@h
249- ori r0, r0, (BUCSR_ENABLE)@l
247+ /* Disable branch prediction during early boot.
248+ * Enabled later in C after DDR stack relocation to avoid
249+ * speculative fetches during hardware init. */
250+ li r0, 0
250251 mtspr SPRN_BUCSR, r0
251252#endif
252253
@@ -559,15 +560,14 @@ flash_law:
559560 lwz r2, 8 (r9)
560561 isync
561562flash_tlb:
562- /* Flash: TLB 1, Entry 2, Super X/R/W, W/I/G, TS=0, 64/128M, IPROT */
563- /* Write is required for Write/Erase using CFI commands to base */
564- #ifdef BUILD_LOADER_STAGE1
565- /* Using XIP from this flash, so cannot use cache inhibit */
563+ /* Flash: TLB 1, Entry 2, Super X/R/W, W+G, TS=0, 64/128M, IPROT
564+ * Write-through (W) enables L1 I-cache to cache flash instruction
565+ * fetches during XIP boot — matches reference T2080 implementation.
566+ * Guarded (G) prevents speculative prefetches to the IFC.
567+ * After DDR stack relocation, C code switches to I|G for flash
568+ * write/erase (hal_flash_cache_disable) or M for full caching
569+ * (hal_flash_enable_caching). */
566570 #define FLASH_TLB_WING (MAS2_W | MAS2_G)
567- #else
568- /* IFC polling requires cache inhibit */
569- #define FLASH_TLB_WING (MAS2_I | MAS2_G)
570- #endif
571571 set_tlb(1 , 2 ,
572572 FLASH_BASE_ADDR, FLASH_BASE_ADDR, FLASH_BASE_PHYS_HIGH,
573573 MAS3_SX | MAS3_SW | MAS3_SR, FLASH_TLB_WING, 0 ,
@@ -917,15 +917,24 @@ cache_sram_init_loop:
917917 bdnz cache_sram_init_loop
918918#elif defined(L2SRAM_ADDR)
919919cache_sram_init:
920- /* CPC SRAM: no dcbz init needed.
921- * dcbz generates a coherent "allocate-and-zero" transaction on CoreNet.
922- * On cold power cycle this transaction hangs (CPC SRAM does not support
923- * it before the first regular store hits the SRAM). Since CPCPE (ECC)
924- * is intentionally disabled in SRAM mode, there are no parity bits to
925- * initialize. The stack stores in setup_stack below use regular stw,
926- * which CPC SRAM handles correctly. */
927- uart_putc_debug 'H' /* checkpoint H: skipping dcbz, entering setup_stack */
928- uart_putc_debug 'C' /* checkpoint C: (skipped dcbz loop) */
920+ /* Zero CPC SRAM to initialize ECC/parity for all cache lines.
921+ * Uses stdu (8-byte double-word stores with update) like the reference
922+ * T2080 implementation. dcbz cannot be used here because it generates
923+ * a coherent "allocate-and-zero" CoreNet transaction that CPC SRAM
924+ * does not support on cold power cycle. stdu generates normal store
925+ * transactions that work correctly through L1→L2→CPC SRAM.
926+ * CPCPE (ECC) is disabled in SRAM mode, so line fills of uninitialized
927+ * data do not trigger parity errors. After zeroing, all SRAM is clean
928+ * and safe for stack use. */
929+ uart_putc_debug 'H' /* checkpoint H: zeroing CPC SRAM */
930+ LOAD_ADDR32(r2, L2SRAM_ADDR - 8 ) /* stdu pre-decrements by 8 */
931+ lis r3, (L2SRAM_SIZE >> 3 )@h /* count = size / 8 (doublewords) */
932+ ori r3, r3, (L2SRAM_SIZE >> 3 )@l
933+ mtctr r3
934+ li r3, 0
935+ 1: stdu r3, 8 (r2)
936+ bdnz 1b
937+ uart_putc_debug 'C' /* checkpoint C: SRAM zeroed */
929938#endif /* L1_CACHE_ADDR */
930939
931940setup_stack:
@@ -945,9 +954,9 @@ setup_stack:
945954 stwu r1, -8 (r1) /* Save back chain and move SP */
946955 stw r0, +12 (r1) /* Save return addr (underflow vect) */
947956
948- /* switch back to AS/TS=0 */
949- lis r3, (MSR_CE | MSR_ME | MSR_DE)@h
950- ori r3, r3, (MSR_CE | MSR_ME | MSR_DE)@l
957+ /* switch back to AS/TS=0, enable recoverable interrupts */
958+ lis r3, (MSR_CE | MSR_ME | MSR_DE | MSR_RI )@h
959+ ori r3, r3, (MSR_CE | MSR_ME | MSR_DE | MSR_RI )@l
951960 mtmsr r3
952961 isync
953962 uart_putc_debug 'D' /* checkpoint D: stack ready, entering C */
0 commit comments