Skip to content

Commit 610af12

Browse files
committed
Progress with multi-core
1 parent 61638a4 commit 610af12

2 files changed

Lines changed: 159 additions & 45 deletions

File tree

hal/nxp_t2080.c

Lines changed: 148 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,11 @@
3232
#define ENABLE_BUS_CLK_CALC
3333

3434
#ifndef BUILD_LOADER_STAGE1
35-
/* TODO: Fix e6500 MP initialization - secondary cores not responding.
36-
* Disable MP for now to focus on getting basic boot working. */
37-
/* #define ENABLE_MP */ /* multi-core support */
35+
#define ENABLE_MP /* multi-core support */
3836
#endif
3937

4038
/* Forward declarations */
41-
static void hal_flash_unlock_sector(uint32_t sector);
39+
static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector);
4240
#ifdef ENABLE_MP
4341
static void hal_mp_init(void);
4442
#endif
@@ -202,24 +200,32 @@ static int hal_flash_getid(void)
202200
static void hal_flash_init(void)
203201
{
204202
#ifdef ENABLE_IFC
203+
uint32_t cspr;
204+
205205
/* IFC CS0 - NOR Flash
206-
* Do NOT reprogram IFC CS0 (CSPR, AMASK, CSOR, FTIM) while executing
207-
* from flash (XIP) with cache-inhibited TLB (MAS2_I|MAS2_G). The boot
208-
* ROM already configured CS0 correctly. Reprogramming CSPR while XIP
209-
* can cause instruction fetch failures because there is no cache to
210-
* serve fetches during the chip-select decode transition.
211-
*
212-
* U-Boot avoids this by using MAS2_W|MAS2_G (write-through, cached)
213-
* during XIP, only switching to MAS2_I|MAS2_G after relocating to RAM.
206+
* Do NOT reprogram IFC CS0 base address, port size, AMASK, CSOR, or
207+
* FTIM while executing from flash (XIP). The boot ROM already
208+
* configured CS0 correctly.
214209
*
215-
* The LAW is also already set in boot_ppc_start.S:flash_law.
216-
*/
210+
* However, the boot ROM may set IFC_CSPR_WP (write-protect), which
211+
* blocks all write cycles to the flash. This prevents AMD command
212+
* sequences (erase/program) from reaching the chips. Clearing just
213+
* the WP bit is safe during XIP — it doesn't change chip-select
214+
* decode, only enables write forwarding. */
215+
cspr = get32(IFC_CSPR(0));
216+
#ifdef DEBUG_UART
217+
wolfBoot_printf("IFC CSPR0: 0x%x%s\n", cspr,
218+
(cspr & IFC_CSPR_WP) ? " (WP set)" : "");
219+
#endif
220+
if (cspr & IFC_CSPR_WP) {
221+
set32(IFC_CSPR(0), cspr & ~IFC_CSPR_WP);
222+
}
217223

218224
/* Note: hal_flash_getid() is disabled because AMD Autoselect mode
219225
* affects the entire flash bank. Since wolfBoot runs XIP from the same
220226
* bank (CS0), entering Autoselect mode crashes instruction fetch.
221-
* Flash write/erase operations will need RAMFUNCTION support.
222-
* TODO: Implement RAMFUNCTION for flash operations on T2080. */
227+
* Flash write/erase use RAMFUNCTION to execute from DDR during
228+
* flash command mode (after .ramcode relocation in hal_init). */
223229
#endif /* ENABLE_IFC */
224230
}
225231

@@ -630,10 +636,6 @@ void hal_init(void)
630636
#endif
631637
#endif /* ENABLE_CPLD */
632638

633-
#ifdef ENABLE_MP
634-
hal_mp_init();
635-
#endif
636-
637639
#ifdef ENABLE_DDR
638640
/* Test DDR (when DEBUG_UART enabled) */
639641
#ifdef DEBUG_UART
@@ -651,17 +653,78 @@ void hal_init(void)
651653
hal_reconfigure_cpc_as_cache();
652654
hal_flash_enable_caching();
653655
#endif
656+
657+
#ifdef ENABLE_MP
658+
/* Start secondary cores AFTER CPC release and flash caching.
659+
* Secondary cores' L2 flash-invalidate on the shared cluster L2
660+
* must not disrupt the CPC SRAM→cache transition. Starting them
661+
* after ensures the cache hierarchy is fully stable. */
662+
hal_mp_init();
663+
#endif
664+
}
665+
666+
/* RAM-resident microsecond delay using inline timebase reads.
667+
* Cannot call wait_ticks() (in flash .text) from RAMFUNCTION code
668+
* while flash is in command mode — instruction fetch would return garbage. */
669+
static void RAMFUNCTION ram_udelay(uint32_t delay_us)
670+
{
671+
uint32_t tbl_start, tbl_now;
672+
uint32_t ticks = delay_us * DELAY_US;
673+
__asm__ __volatile__("mfspr %0,268" : "=r"(tbl_start));
674+
do {
675+
__asm__ __volatile__("mfspr %0,268" : "=r"(tbl_now));
676+
} while ((tbl_now - tbl_start) < ticks);
654677
}
655678

656-
static void hal_flash_unlock_sector(uint32_t sector)
679+
/* Switch flash TLB to cache-inhibited for direct flash chip access.
680+
* AMD flash commands require writes to reach the chip immediately and
681+
* status reads to come directly from the chip. With MAS2_M (cacheable),
682+
* writes are cached and never reach the flash, reads return stale data.
683+
* Uses direct SPR manipulation to avoid calling .text functions. */
684+
static void RAMFUNCTION hal_flash_cache_disable(void)
685+
{
686+
uint32_t mas2;
687+
/* Select TLB1, entry 2 (flash) */
688+
mtspr(MAS0, BOOKE_MAS0(1, 2, 0));
689+
__asm__ __volatile__("isync; tlbre; isync");
690+
/* Change WIMGE from M to I|G */
691+
mas2 = mfspr(MAS2);
692+
mas2 &= ~0x1F; /* clear WIMGE bits */
693+
mas2 |= (MAS2_I | MAS2_G);
694+
mtspr(MAS2, mas2);
695+
__asm__ __volatile__("isync; msync; tlbwe; isync");
696+
}
697+
698+
/* Restore flash TLB to cacheable mode after flash operation.
699+
* Flash is back in read-array mode, safe to cache again. */
700+
static void RAMFUNCTION hal_flash_cache_enable(void)
701+
{
702+
uint32_t mas2;
703+
/* Select TLB1, entry 2 (flash) */
704+
mtspr(MAS0, BOOKE_MAS0(1, 2, 0));
705+
__asm__ __volatile__("isync; tlbre; isync");
706+
/* Change WIMGE from I|G to M (cacheable) */
707+
mas2 = mfspr(MAS2);
708+
mas2 &= ~0x1F;
709+
mas2 |= MAS2_M;
710+
mtspr(MAS2, mas2);
711+
__asm__ __volatile__("isync; msync; tlbwe; isync");
712+
/* Invalidate D-cache and I-cache — stale entries from before
713+
* the flash operation must be discarded */
714+
invalidate_dcache();
715+
invalidate_icache();
716+
}
717+
718+
static void RAMFUNCTION hal_flash_unlock_sector(uint32_t sector)
657719
{
658720
/* AMD unlock sequence */
659721
FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR1, AMD_CMD_UNLOCK_START);
660722
FLASH_IO8_WRITE(sector, FLASH_UNLOCK_ADDR2, AMD_CMD_UNLOCK_ACK);
661723
}
662724

663-
/* wait for toggle to stop and status mask to be met within microsecond timeout */
664-
static int hal_flash_status_wait(uint32_t sector, uint16_t mask,
725+
/* wait for toggle to stop and status mask to be met within microsecond timeout.
726+
* RAMFUNCTION: executes from DDR while flash is in program/erase command mode. */
727+
static int RAMFUNCTION hal_flash_status_wait(uint32_t sector, uint16_t mask,
665728
uint32_t timeout_us)
666729
{
667730
int ret = 0;
@@ -682,7 +745,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask,
682745
#endif
683746
if (read1 == read2 && ((read1 & mask) == mask))
684747
break;
685-
udelay(1);
748+
ram_udelay(1);
686749
} while (timeout++ < timeout_us);
687750
if (timeout >= timeout_us) {
688751
ret = -1; /* timeout */
@@ -694,7 +757,7 @@ static int hal_flash_status_wait(uint32_t sector, uint16_t mask,
694757
return ret;
695758
}
696759

697-
int hal_flash_write(uint32_t address, const uint8_t *data, int len)
760+
int RAMFUNCTION hal_flash_write(uint32_t address, const uint8_t *data, int len)
698761
{
699762
int ret = 0;
700763
uint32_t i, pos, sector, offset, xfer, nwords;
@@ -708,6 +771,9 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len)
708771
data, address, len);
709772
#endif
710773

774+
/* Disable flash caching — AMD commands must reach the chip directly */
775+
hal_flash_cache_disable();
776+
711777
pos = 0;
712778
while (len > 0) {
713779
/* determine sector address */
@@ -754,10 +820,13 @@ int hal_flash_write(uint32_t address, const uint8_t *data, int len)
754820
address += xfer;
755821
len -= xfer;
756822
}
823+
824+
/* Restore flash caching — flash is back in read-array mode */
825+
hal_flash_cache_enable();
757826
return ret;
758827
}
759828

760-
int hal_flash_erase(uint32_t address, int len)
829+
int RAMFUNCTION hal_flash_erase(uint32_t address, int len)
761830
{
762831
int ret = 0;
763832
uint32_t sector;
@@ -766,6 +835,9 @@ int hal_flash_erase(uint32_t address, int len)
766835
if (address >= FLASH_BASE_ADDR)
767836
address -= FLASH_BASE_ADDR;
768837

838+
/* Disable flash caching — AMD commands must reach the chip directly */
839+
hal_flash_cache_disable();
840+
769841
while (len > 0) {
770842
/* determine sector address */
771843
sector = (address / FLASH_SECTOR_SIZE);
@@ -792,10 +864,13 @@ int hal_flash_erase(uint32_t address, int len)
792864
address += FLASH_SECTOR_SIZE;
793865
len -= FLASH_SECTOR_SIZE;
794866
}
867+
868+
/* Restore flash caching — flash is back in read-array mode */
869+
hal_flash_cache_enable();
795870
return ret;
796871
}
797872

798-
void hal_flash_unlock(void)
873+
void RAMFUNCTION hal_flash_unlock(void)
799874
{
800875
/* Per-sector unlock is done in hal_flash_write/erase before each operation.
801876
* The previous non-volatile PPB protection mode (C0h) approach caused
@@ -818,8 +893,9 @@ extern uint32_t _spin_table[];
818893
extern uint32_t _spin_table_addr;
819894
extern uint32_t _bootpg_addr;
820895

821-
/* Startup additional cores with spin table and synchronize the timebase */
822-
static void hal_mp_up(uint32_t bootpg)
896+
/* Startup additional cores with spin table and synchronize the timebase.
897+
* spin_table_ddr: DDR address of the spin table (for checking status) */
898+
static void hal_mp_up(uint32_t bootpg, uint32_t spin_table_ddr)
823899
{
824900
uint32_t all_cores, active_cores, whoami;
825901
int timeout = 50, i;
@@ -829,7 +905,7 @@ static void hal_mp_up(uint32_t bootpg)
829905
active_cores = (1 << whoami); /* current running cores */
830906

831907
wolfBoot_printf("MP: Starting cores (boot page %p, spin table %p)\n",
832-
bootpg, (uint32_t)_spin_table);
908+
bootpg, spin_table_ddr);
833909

834910
/* Set the boot page translation register */
835911
set32(LCC_BSTRH, 0);
@@ -849,8 +925,8 @@ static void hal_mp_up(uint32_t bootpg)
849925
/* wait for other core(s) to start */
850926
while (timeout) {
851927
for (i = 0; i < CPU_NUMCORES; i++) {
852-
uint32_t* entry = (uint32_t*)(
853-
(uint8_t*)_spin_table + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER);
928+
volatile uint32_t* entry = (volatile uint32_t*)(
929+
spin_table_ddr + (i * ENTRY_SIZE) + ENTRY_ADDR_LOWER);
854930
if (*entry) {
855931
active_cores |= (1 << i);
856932
}
@@ -881,7 +957,7 @@ static void hal_mp_up(uint32_t bootpg)
881957
static void hal_mp_init(void)
882958
{
883959
uint32_t *fixup = (uint32_t*)&_secondary_start_page;
884-
uint32_t bootpg;
960+
uint32_t bootpg, second_half_ddr, spin_table_ddr;
885961
int i_tlb = 0; /* always 0 */
886962
size_t i;
887963
const volatile uint32_t *s;
@@ -893,31 +969,60 @@ static void hal_mp_init(void)
893969
* size to ensure bootpg fits in 32 bits and is accessible. */
894970
bootpg = DDR_ADDRESS + 0x80000000UL - BOOT_ROM_SIZE;
895971

896-
/* Store the boot page address for use by additional CPU cores */
897-
_bootpg_addr = (uint32_t)&_second_half_boot_page;
972+
/* Second half boot page (spin loop + spin table) goes just below.
973+
* For XIP flash builds, .bootmp is in flash — secondary cores can't
974+
* write to flash, so the spin table MUST be in DDR. */
975+
second_half_ddr = bootpg - BOOT_ROM_SIZE;
898976

899-
/* Store location of spin table for other cores */
900-
_spin_table_addr = (uint32_t)_spin_table;
977+
/* DDR addresses for second half symbols */
978+
spin_table_ddr = second_half_ddr +
979+
((uint32_t)_spin_table - (uint32_t)&_second_half_boot_page);
901980

902-
/* Flush bootpg before copying to invalidate any stale cache lines */
981+
/* Flush DDR destination before copying */
903982
flush_cache(bootpg, BOOT_ROM_SIZE);
983+
flush_cache(second_half_ddr, BOOT_ROM_SIZE);
904984

905-
/* Map reset page to bootpg so we can copy code there */
985+
/* Map reset page to bootpg so we can copy code there.
986+
* Boot page translation will redirect secondary core fetches from
987+
* 0xFFFFF000 to bootpg in DDR. */
906988
disable_tlb1(i_tlb);
907989
set_tlb(1, i_tlb, BOOT_ROM_ADDR, bootpg, 0, /* tlb, epn, rpn, urpn */
908990
(MAS3_SX | MAS3_SW | MAS3_SR), (MAS2_I | MAS2_G), /* perms, wimge */
909991
0, BOOKE_PAGESZ_4K, 1); /* ts, esel, tsize, iprot */
910992

911-
/* copy startup code to virtually mapped boot address */
912-
/* do not use memcpy due to compiler array bounds report (not valid) */
993+
/* Copy first half (startup code) to DDR via BOOT_ROM_ADDR mapping.
994+
* Uses cache-inhibited TLB to ensure data reaches DDR immediately. */
913995
s = (const uint32_t*)fixup;
914996
d = (uint32_t*)BOOT_ROM_ADDR;
915997
for (i = 0; i < BOOT_ROM_SIZE/4; i++) {
916998
d[i] = s[i];
917999
}
9181000

919-
/* start core and wait for it to be enabled */
920-
hal_mp_up(bootpg);
1001+
/* Write _bootpg_addr and _spin_table_addr into the DDR first-half copy.
1002+
* These variables are .long 0 in the linked .bootmp (flash), and direct
1003+
* stores to their flash addresses silently fail on XIP builds.
1004+
* Calculate offsets within the boot page and write via BOOT_ROM_ADDR. */
1005+
{
1006+
volatile uint32_t *bp = (volatile uint32_t*)(BOOT_ROM_ADDR +
1007+
((uint32_t)&_bootpg_addr - (uint32_t)&_secondary_start_page));
1008+
volatile uint32_t *st = (volatile uint32_t*)(BOOT_ROM_ADDR +
1009+
((uint32_t)&_spin_table_addr - (uint32_t)&_secondary_start_page));
1010+
*bp = second_half_ddr;
1011+
*st = spin_table_ddr;
1012+
}
1013+
1014+
/* Copy second half (spin loop + spin table) directly to DDR.
1015+
* Master has DDR TLB (entry 12, MAS2_M). Flush cache after copy
1016+
* to ensure secondary cores see the data. */
1017+
s = (const uint32_t*)&_second_half_boot_page;
1018+
d = (uint32_t*)second_half_ddr;
1019+
for (i = 0; i < BOOT_ROM_SIZE/4; i++) {
1020+
d[i] = s[i];
1021+
}
1022+
flush_cache(second_half_ddr, BOOT_ROM_SIZE);
1023+
1024+
/* start cores and wait for them to be enabled */
1025+
hal_mp_up(bootpg, spin_table_ddr);
9211026
}
9221027
#endif /* ENABLE_MP */
9231028

src/boot_ppc_mp.S

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,8 +110,8 @@ branch_prediction:
110110
srwi r10, r0, 5 /* r10 = cluster */
111111

112112
mulli r5, r10, CORES_PER_CLUSTER
113-
add r5, r5, r8
114-
mulli r4, r5, CORES_PER_CLUSTER
113+
add r5, r5, r8 /* r5 = linear core ID */
114+
mr r4, r5 /* r4 = PIR = linear core ID */
115115
#elif defined(CORE_E500MC) /* BOOKE e500mc family */
116116
rlwinm r4, r0, 27, 27, 31
117117
mr r5, r4
@@ -132,6 +132,15 @@ branch_prediction:
132132
mtspr L1CSR2, r8
133133

134134
#if defined(CORE_E6500) /* --- L2 E6500 --- */
135+
ccsr_tlb_mp:
136+
/* e6500 L2 uses memory-mapped CCSR registers (L2_CLUSTER_BASE).
137+
* Secondary cores have no TLBs on entry — only the boot page
138+
* translation provides initial access. Add a temporary CCSR
139+
* mapping (TLB1 entry 2) so L2 setup can access the registers. */
140+
set_tlb(1, 2,
141+
CCSRBAR, CCSRBAR, CCSRBAR_PHYS_HIGH,
142+
MAS3_SW | MAS3_SR, MAS2_I | MAS2_G, 0,
143+
CCSRBAR_SIZE, 0, r11)
135144
l2_setup_cache:
136145
/* E6500CORERM: 11.7 L2 cache state */
137146
/* R5 = L2 cluster 1 base */

0 commit comments

Comments
 (0)